diff options
-rw-r--r-- | include/llvm/CodeGen/ScheduleDAG.h | 127 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/ScheduleDAG.cpp | 182 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp | 52 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp | 469 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/ScheduleDAGSimple.cpp | 4 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp | 2 |
6 files changed, 658 insertions, 178 deletions
diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h index 17938d7..ad44b24 100644 --- a/include/llvm/CodeGen/ScheduleDAG.h +++ b/include/llvm/CodeGen/ScheduleDAG.h @@ -79,12 +79,13 @@ namespace llvm { /// SDep - Scheduling dependency. It keeps track of dependent nodes, /// cost of the depdenency, etc. struct SDep { - SUnit *Dep; // Dependent - either a predecessor or a successor. - bool isCtrl; // True iff it's a control dependency. - unsigned PhyReg; // If non-zero, this dep is a phy register dependency. - int Cost; // Cost of the dependency. - SDep(SUnit *d, bool c, unsigned r, int t) - : Dep(d), isCtrl(c), PhyReg(r), Cost(t) {} + SUnit *Dep; // Dependent - either a predecessor or a successor. + unsigned Reg; // If non-zero, this dep is a phy register dependency. + int Cost; // Cost of the dependency. + bool isCtrl : 1; // True iff it's a control dependency. + bool isSpecial : 1; // True iff it's a special ctrl dep added during sched. + SDep(SUnit *d, unsigned r, int t, bool c, bool s) + : Dep(d), Reg(r), Cost(t), isCtrl(c), isSpecial(s) {} }; /// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or @@ -92,6 +93,8 @@ namespace llvm { struct SUnit { SDNode *Node; // Representative node. SmallVector<SDNode*,4> FlaggedNodes;// All nodes flagged to Node. + unsigned InstanceNo; // Instance#. One SDNode can be multiple + // SUnit due to cloning. // Preds/Succs - The SUnits before/after us in the graph. The boolean value // is true if the edge is a token chain edge, false if it is a value edge. @@ -103,6 +106,8 @@ namespace llvm { typedef SmallVector<SDep, 4>::const_iterator const_pred_iterator; typedef SmallVector<SDep, 4>::const_iterator const_succ_iterator; + unsigned NodeNum; // Entry # of node in the node vector. + unsigned short Latency; // Node latency. short NumPreds; // # of preds. short NumSuccs; // # of sucss. short NumPredsLeft; // # of preds not scheduled. @@ -111,42 +116,94 @@ namespace llvm { short NumChainSuccsLeft; // # of chain succs not scheduled. bool isTwoAddress : 1; // Is a two-address instruction. bool isCommutable : 1; // Is a commutable instruction. + bool hasImplicitDefs : 1; // Has implicit physical reg defs. bool isPending : 1; // True once pending. bool isAvailable : 1; // True once available. bool isScheduled : 1; // True once scheduled. - unsigned short Latency; // Node latency. unsigned CycleBound; // Upper/lower cycle to be scheduled at. unsigned Cycle; // Once scheduled, the cycle of the op. unsigned Depth; // Node depth; unsigned Height; // Node height; - unsigned NodeNum; // Entry # of node in the node vector. SUnit(SDNode *node, unsigned nodenum) - : Node(node), NumPreds(0), NumSuccs(0), NumPredsLeft(0), NumSuccsLeft(0), + : Node(node), InstanceNo(0), NodeNum(nodenum), Latency(0), + NumPreds(0), NumSuccs(0), NumPredsLeft(0), NumSuccsLeft(0), NumChainPredsLeft(0), NumChainSuccsLeft(0), - isTwoAddress(false), isCommutable(false), + isTwoAddress(false), isCommutable(false), hasImplicitDefs(false), isPending(false), isAvailable(false), isScheduled(false), - Latency(0), CycleBound(0), Cycle(0), Depth(0), Height(0), - NodeNum(nodenum) {} - + CycleBound(0), Cycle(0), Depth(0), Height(0) {} + /// addPred - This adds the specified node as a pred of the current node if /// not already. This returns true if this is a new pred. - bool addPred(SUnit *N, bool isCtrl, unsigned PhyReg = 0, int Cost = 1) { + bool addPred(SUnit *N, bool isCtrl, bool isSpecial, + unsigned PhyReg = 0, int Cost = 1) { for (unsigned i = 0, e = Preds.size(); i != e; ++i) - if (Preds[i].Dep == N && Preds[i].isCtrl == isCtrl) + if (Preds[i].Dep == N && + Preds[i].isCtrl == isCtrl && Preds[i].isSpecial == isSpecial) return false; - Preds.push_back(SDep(N, isCtrl, PhyReg, Cost)); + Preds.push_back(SDep(N, PhyReg, Cost, isCtrl, isSpecial)); + N->Succs.push_back(SDep(this, PhyReg, Cost, isCtrl, isSpecial)); + if (isCtrl) { + if (!N->isScheduled) + ++NumChainPredsLeft; + if (!isScheduled) + ++N->NumChainSuccsLeft; + } else { + ++NumPreds; + ++N->NumSuccs; + if (!N->isScheduled) + ++NumPredsLeft; + if (!isScheduled) + ++N->NumSuccsLeft; + } return true; } - /// addSucc - This adds the specified node as a succ of the current node if - /// not already. This returns true if this is a new succ. - bool addSucc(SUnit *N, bool isCtrl, unsigned PhyReg = 0, int Cost = 1) { + bool removePred(SUnit *N, bool isCtrl, bool isSpecial) { + for (SmallVector<SDep, 4>::iterator I = Preds.begin(), E = Preds.end(); + I != E; ++I) + if (I->Dep == N && I->isCtrl == isCtrl && I->isSpecial == isSpecial) { + bool FoundSucc = false; + for (SmallVector<SDep, 4>::iterator II = N->Succs.begin(), + EE = N->Succs.end(); II != EE; ++II) + if (II->Dep == this && + II->isCtrl == isCtrl && II->isSpecial == isSpecial) { + FoundSucc = true; + N->Succs.erase(II); + break; + } + assert(FoundSucc && "Mismatching preds / succs lists!"); + Preds.erase(I); + if (isCtrl) { + if (!N->isScheduled) + --NumChainPredsLeft; + if (!isScheduled) + --NumChainSuccsLeft; + } else { + --NumPreds; + --N->NumSuccs; + if (!N->isScheduled) + --NumPredsLeft; + if (!isScheduled) + --N->NumSuccsLeft; + } + return true; + } + return false; + } + + bool isPred(SUnit *N) { + for (unsigned i = 0, e = Preds.size(); i != e; ++i) + if (Preds[i].Dep == N) + return true; + return false; + } + + bool isSucc(SUnit *N) { for (unsigned i = 0, e = Succs.size(); i != e; ++i) - if (Succs[i].Dep == N && Succs[i].isCtrl == isCtrl) - return false; - Succs.push_back(SDep(N, isCtrl, PhyReg, Cost)); - return true; + if (Succs[i].Dep == N) + return true; + return false; } void dump(const SelectionDAG *G) const; @@ -165,20 +222,27 @@ namespace llvm { public: virtual ~SchedulingPriorityQueue() {} - virtual void initNodes(DenseMap<SDNode*, SUnit*> &SUMap, + virtual void initNodes(DenseMap<SDNode*, std::vector<SUnit*> > &SUMap, std::vector<SUnit> &SUnits) = 0; + virtual void addNode(const SUnit *SU) = 0; + virtual void updateNode(const SUnit *SU) = 0; virtual void releaseState() = 0; - + + virtual unsigned size() const = 0; virtual bool empty() const = 0; virtual void push(SUnit *U) = 0; virtual void push_all(const std::vector<SUnit *> &Nodes) = 0; virtual SUnit *pop() = 0; + virtual void remove(SUnit *SU) = 0; + /// ScheduledNode - As each node is scheduled, this method is invoked. This /// allows the priority function to adjust the priority of node that have /// already been emitted. virtual void ScheduledNode(SUnit *Node) {} + + virtual void UnscheduledNode(SUnit *Node) {} }; class ScheduleDAG { @@ -192,7 +256,8 @@ namespace llvm { MachineConstantPool *ConstPool; // Target constant pool std::vector<SUnit*> Sequence; // The schedule. Null SUnit*'s // represent noop instructions. - DenseMap<SDNode*, SUnit*> SUnitMap; // SDNode to SUnit mapping (n -> 1). + DenseMap<SDNode*, std::vector<SUnit*> > SUnitMap; + // SDNode to SUnit mapping (n -> n). std::vector<SUnit> SUnits; // The scheduling units. SmallSet<SDNode*, 16> CommuteSet; // Nodes the should be commuted. @@ -232,6 +297,10 @@ namespace llvm { return &SUnits.back(); } + /// Clone - Creates a clone of the specified SUnit. It does not copy the + /// predecessors / successors info nor the temporary scheduling states. + SUnit *Clone(SUnit *N); + /// BuildSchedUnits - Build SUnits from the selection dag that we are input. /// This SUnit graph is similar to the SelectionDAG, but represents flagged /// together nodes with a single SUnit. @@ -256,7 +325,8 @@ namespace llvm { /// VRBaseMap contains, for each already emitted node, the first virtual /// register number for the results of the node. /// - void EmitNode(SDNode *Node, DenseMap<SDOperand, unsigned> &VRBaseMap); + void EmitNode(SDNode *Node, unsigned InstNo, + DenseMap<SDOperand, unsigned> &VRBaseMap); /// EmitNoop - Emit a noop instruction. /// @@ -264,7 +334,8 @@ namespace llvm { /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an /// implicit physical register output. - void EmitCopyFromReg(SDNode *Node, unsigned ResNo, unsigned SrcReg, + void EmitCopyFromReg(SDNode *Node, unsigned ResNo, unsigned InstNo, + unsigned SrcReg, DenseMap<SDOperand, unsigned> &VRBaseMap); void CreateVirtualRegisters(SDNode *Node, MachineInstr *MI, diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp index 1346595..b775122 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp @@ -27,6 +27,65 @@ #include "llvm/Support/MathExtras.h" using namespace llvm; + +/// getPhysicalRegisterRegClass - Returns the Register Class of a physical +/// register. +static const TargetRegisterClass *getPhysicalRegisterRegClass( + const MRegisterInfo *MRI, + MVT::ValueType VT, + unsigned reg) { + assert(MRegisterInfo::isPhysicalRegister(reg) && + "reg must be a physical register"); + // Pick the register class of the right type that contains this physreg. + for (MRegisterInfo::regclass_iterator I = MRI->regclass_begin(), + E = MRI->regclass_end(); I != E; ++I) + if ((*I)->hasType(VT) && (*I)->contains(reg)) + return *I; + assert(false && "Couldn't find the register class"); + return 0; +} + + +/// CheckForPhysRegDependency - Check if the dependency between def and use of +/// a specified operand is a physical register dependency. If so, returns the +/// register and the cost of copying the register. +static void CheckForPhysRegDependency(SDNode *Def, SDNode *Use, unsigned Op, + const MRegisterInfo *MRI, + const TargetInstrInfo *TII, + unsigned &PhysReg, int &Cost) { + if (Op != 2 || Use->getOpcode() != ISD::CopyToReg) + return; + + unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg(); + if (MRegisterInfo::isVirtualRegister(Reg)) + return; + + unsigned ResNo = Use->getOperand(2).ResNo; + if (Def->isTargetOpcode()) { + const TargetInstrDescriptor &II = TII->get(Def->getTargetOpcode()); + if (ResNo >= II.numDefs && + II.ImplicitDefs[ResNo - II.numDefs] == Reg) { + PhysReg = Reg; + const TargetRegisterClass *RC = + getPhysicalRegisterRegClass(MRI, Def->getValueType(ResNo), Reg); + Cost = RC->getCopyCost(); + } + } +} + +SUnit *ScheduleDAG::Clone(SUnit *Old) { + SUnit *SU = NewSUnit(Old->Node); + for (unsigned i = 0, e = SU->FlaggedNodes.size(); i != e; ++i) + SU->FlaggedNodes.push_back(SU->FlaggedNodes[i]); + SU->InstanceNo = SUnitMap[Old->Node].size(); + SU->Latency = Old->Latency; + SU->isTwoAddress = Old->isTwoAddress; + SU->isCommutable = Old->isCommutable; + SU->hasImplicitDefs = Old->hasImplicitDefs; + SUnitMap[Old->Node].push_back(SU); + return SU; +} + /// BuildSchedUnits - Build SUnits from the selection dag that we are input. /// This SUnit graph is similar to the SelectionDAG, but represents flagged /// together nodes with a single SUnit. @@ -44,7 +103,7 @@ void ScheduleDAG::BuildSchedUnits() { continue; // If this node has already been processed, stop now. - if (SUnitMap[NI]) continue; + if (SUnitMap[NI].size()) continue; SUnit *NodeSUnit = NewSUnit(NI); @@ -59,7 +118,7 @@ void ScheduleDAG::BuildSchedUnits() { do { N = N->getOperand(N->getNumOperands()-1).Val; NodeSUnit->FlaggedNodes.push_back(N); - SUnitMap[N] = NodeSUnit; + SUnitMap[N].push_back(NodeSUnit); } while (N->getNumOperands() && N->getOperand(N->getNumOperands()-1).getValueType()== MVT::Flag); std::reverse(NodeSUnit->FlaggedNodes.begin(), @@ -79,7 +138,7 @@ void ScheduleDAG::BuildSchedUnits() { if (FlagVal.isOperand(*UI)) { HasFlagUse = true; NodeSUnit->FlaggedNodes.push_back(N); - SUnitMap[N] = NodeSUnit; + SUnitMap[N].push_back(NodeSUnit); N = *UI; break; } @@ -89,7 +148,7 @@ void ScheduleDAG::BuildSchedUnits() { // Now all flagged nodes are in FlaggedNodes and N is the bottom-most node. // Update the SUnit NodeSUnit->Node = N; - SUnitMap[N] = NodeSUnit; + SUnitMap[N].push_back(NodeSUnit); // Compute the latency for the node. We use the sum of the latencies for // all nodes flagged together into this SUnit. @@ -125,13 +184,16 @@ void ScheduleDAG::BuildSchedUnits() { if (MainNode->isTargetOpcode()) { unsigned Opc = MainNode->getTargetOpcode(); - for (unsigned i = 0, ee = TII->getNumOperands(Opc); i != ee; ++i) { - if (TII->getOperandConstraint(Opc, i, TOI::TIED_TO) != -1) { + const TargetInstrDescriptor &TID = TII->get(Opc); + if (TID.ImplicitDefs) + SU->hasImplicitDefs = true; + for (unsigned i = 0; i != TID.numOperands; ++i) { + if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { SU->isTwoAddress = true; break; } } - if (TII->isCommutableInstr(Opc)) + if (TID.Flags & M_COMMUTABLE) SU->isCommutable = true; } @@ -141,34 +203,25 @@ void ScheduleDAG::BuildSchedUnits() { for (unsigned n = 0, e = SU->FlaggedNodes.size(); n != e; ++n) { SDNode *N = SU->FlaggedNodes[n]; + if (N->isTargetOpcode() && TII->getImplicitDefs(N->getTargetOpcode())) + SU->hasImplicitDefs = true; for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { SDNode *OpN = N->getOperand(i).Val; if (isPassiveNode(OpN)) continue; // Not scheduled. - SUnit *OpSU = SUnitMap[OpN]; + SUnit *OpSU = SUnitMap[OpN].front(); assert(OpSU && "Node has no SUnit!"); if (OpSU == SU) continue; // In the same group. MVT::ValueType OpVT = N->getOperand(i).getValueType(); assert(OpVT != MVT::Flag && "Flagged nodes should be in same sunit!"); bool isChain = OpVT == MVT::Other; - - if (SU->addPred(OpSU, isChain)) { - if (!isChain) { - SU->NumPreds++; - SU->NumPredsLeft++; - } else { - SU->NumChainPredsLeft++; - } - } - if (OpSU->addSucc(SU, isChain)) { - if (!isChain) { - OpSU->NumSuccs++; - OpSU->NumSuccsLeft++; - } else { - OpSU->NumChainSuccsLeft++; - } - } + + unsigned PhysReg = 0; + int Cost = 1; + // Determine if this is a physical register dependency. + CheckForPhysRegDependency(OpN, N, i, MRI, TII, PhysReg, Cost); + SU->addPred(OpSU, isChain, false, PhysReg, Cost); } } @@ -200,7 +253,7 @@ void ScheduleDAG::CalculateDepths() { void ScheduleDAG::CalculateHeights() { std::vector<std::pair<SUnit*, unsigned> > WorkList; - SUnit *Root = SUnitMap[DAG.getRoot().Val]; + SUnit *Root = SUnitMap[DAG.getRoot().Val].front(); WorkList.push_back(std::make_pair(Root, 0U)); while (!WorkList.empty()) { @@ -254,27 +307,14 @@ static const TargetRegisterClass *getInstrOperandRegClass( ? TII->getPointerRegClass() : MRI->getRegClass(toi.RegClass); } -// Returns the Register Class of a physical register -static const TargetRegisterClass *getPhysicalRegisterRegClass( - const MRegisterInfo *MRI, - MVT::ValueType VT, - unsigned reg) { - assert(MRegisterInfo::isPhysicalRegister(reg) && - "reg must be a physical register"); - // Pick the register class of the right type that contains this physreg. - for (MRegisterInfo::regclass_iterator I = MRI->regclass_begin(), - E = MRI->regclass_end(); I != E; ++I) - if ((*I)->hasType(VT) && (*I)->contains(reg)) - return *I; - assert(false && "Couldn't find the register class"); - return 0; -} - -void ScheduleDAG::EmitCopyFromReg(SDNode *Node, unsigned ResNo, unsigned SrcReg, +void ScheduleDAG::EmitCopyFromReg(SDNode *Node, unsigned ResNo, + unsigned InstanceNo, unsigned SrcReg, DenseMap<SDOperand, unsigned> &VRBaseMap) { unsigned VRBase = 0; if (MRegisterInfo::isVirtualRegister(SrcReg)) { // Just use the input register directly! + if (InstanceNo > 0) + VRBaseMap.erase(SDOperand(Node, ResNo)); bool isNew = VRBaseMap.insert(std::make_pair(SDOperand(Node,ResNo),SrcReg)); assert(isNew && "Node emitted out of order - early"); return; @@ -282,32 +322,54 @@ void ScheduleDAG::EmitCopyFromReg(SDNode *Node, unsigned ResNo, unsigned SrcReg, // If the node is only used by a CopyToReg and the dest reg is a vreg, use // the CopyToReg'd destination register instead of creating a new vreg. + bool MatchReg = true; for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); UI != E; ++UI) { SDNode *Use = *UI; + bool Match = true; if (Use->getOpcode() == ISD::CopyToReg && Use->getOperand(2).Val == Node && Use->getOperand(2).ResNo == ResNo) { unsigned DestReg = cast<RegisterSDNode>(Use->getOperand(1))->getReg(); if (MRegisterInfo::isVirtualRegister(DestReg)) { VRBase = DestReg; - break; + Match = false; + } else if (DestReg != SrcReg) + Match = false; + } else { + for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) { + SDOperand Op = Use->getOperand(i); + if (Op.Val != Node) + continue; + MVT::ValueType VT = Node->getValueType(Op.ResNo); + if (VT != MVT::Other && VT != MVT::Flag) + Match = false; } } + MatchReg &= Match; + if (VRBase) + break; } - // Figure out the register class to create for the destreg. const TargetRegisterClass *TRC = 0; - if (VRBase) { + // Figure out the register class to create for the destreg. + if (VRBase) TRC = RegMap->getRegClass(VRBase); - } else { + else TRC = getPhysicalRegisterRegClass(MRI, Node->getValueType(ResNo), SrcReg); - + + // If all uses are reading from the src physical register and copying the + // register is either impossible or very expensive, then don't create a copy. + if (MatchReg && TRC->getCopyCost() < 0) { + VRBase = SrcReg; + } else { // Create the reg, emit the copy. VRBase = RegMap->createVirtualRegister(TRC); + MRI->copyRegToReg(*BB, BB->end(), VRBase, SrcReg, TRC); } - MRI->copyRegToReg(*BB, BB->end(), VRBase, SrcReg, TRC); + if (InstanceNo > 0) + VRBaseMap.erase(SDOperand(Node, ResNo)); bool isNew = VRBaseMap.insert(std::make_pair(SDOperand(Node,ResNo), VRBase)); assert(isNew && "Node emitted out of order - early"); } @@ -611,7 +673,7 @@ void ScheduleDAG::EmitSubregNode(SDNode *Node, /// EmitNode - Generate machine code for an node and needed dependencies. /// -void ScheduleDAG::EmitNode(SDNode *Node, +void ScheduleDAG::EmitNode(SDNode *Node, unsigned InstanceNo, DenseMap<SDOperand, unsigned> &VRBaseMap) { // If machine instruction if (Node->isTargetOpcode()) { @@ -677,7 +739,7 @@ void ScheduleDAG::EmitNode(SDNode *Node, for (unsigned i = II.numDefs; i < NumResults; ++i) { unsigned Reg = II.ImplicitDefs[i - II.numDefs]; if (Node->hasAnyUseOfValue(i)) - EmitCopyFromReg(Node, i, Reg, VRBaseMap); + EmitCopyFromReg(Node, i, InstanceNo, Reg, VRBaseMap); } } } else { @@ -713,7 +775,7 @@ void ScheduleDAG::EmitNode(SDNode *Node, } case ISD::CopyFromReg: { unsigned SrcReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); - EmitCopyFromReg(Node, 0, SrcReg, VRBaseMap); + EmitCopyFromReg(Node, 0, InstanceNo, SrcReg, VRBaseMap); break; } case ISD::INLINEASM: { @@ -802,9 +864,9 @@ void ScheduleDAG::EmitSchedule() { DenseMap<SDOperand, unsigned> VRBaseMap; for (unsigned i = 0, e = Sequence.size(); i != e; i++) { if (SUnit *SU = Sequence[i]) { - for (unsigned j = 0, ee = SU->FlaggedNodes.size(); j != ee; j++) - EmitNode(SU->FlaggedNodes[j], VRBaseMap); - EmitNode(SU->Node, VRBaseMap); + for (unsigned j = 0, ee = SU->FlaggedNodes.size(); j != ee; ++j) + EmitNode(SU->FlaggedNodes[j], SU->InstanceNo, VRBaseMap); + EmitNode(SU->Node, SU->InstanceNo, VRBaseMap); } else { // Null SUnit* is a noop. EmitNoop(); @@ -869,7 +931,10 @@ void SUnit::dumpAll(const SelectionDAG *G) const { cerr << " ch #"; else cerr << " val #"; - cerr << I->Dep << " - SU(" << I->Dep->NodeNum << ")\n"; + cerr << I->Dep << " - SU(" << I->Dep->NodeNum << ")"; + if (I->isSpecial) + cerr << " *"; + cerr << "\n"; } } if (Succs.size() != 0) { @@ -880,7 +945,10 @@ void SUnit::dumpAll(const SelectionDAG *G) const { cerr << " ch #"; else cerr << " val #"; - cerr << I->Dep << " - SU(" << I->Dep->NodeNum << ")\n"; + cerr << I->Dep << " - SU(" << I->Dep->NodeNum << ")"; + if (I->isSpecial) + cerr << " *"; + cerr << "\n"; } } cerr << "\n"; diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp index 7b09749..33d7910 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp @@ -168,7 +168,7 @@ void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { /// schedulers. void ScheduleDAGList::ListScheduleTopDown() { unsigned CurCycle = 0; - SUnit *Entry = SUnitMap[DAG.getEntryNode().Val]; + SUnit *Entry = SUnitMap[DAG.getEntryNode().Val].front(); // All leaves to Available queue. for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { @@ -328,12 +328,24 @@ public: LatencyPriorityQueue() : Queue(latency_sort(this)) { } - void initNodes(DenseMap<SDNode*, SUnit*> &sumap, + void initNodes(DenseMap<SDNode*, std::vector<SUnit*> > &sumap, std::vector<SUnit> &sunits) { SUnits = &sunits; // Calculate node priorities. CalculatePriorities(); } + + void addNode(const SUnit *SU) { + Latencies.resize(SUnits->size(), -1); + NumNodesSolelyBlocking.resize(SUnits->size(), 0); + CalcLatency(*SU); + } + + void updateNode(const SUnit *SU) { + Latencies[SU->NodeNum] = -1; + CalcLatency(*SU); + } + void releaseState() { SUnits = 0; Latencies.clear(); @@ -349,6 +361,8 @@ public: return NumNodesSolelyBlocking[NodeNum]; } + unsigned size() const { return Queue.size(); } + bool empty() const { return Queue.empty(); } virtual void push(SUnit *U) { @@ -368,22 +382,10 @@ public: return V; } - // ScheduledNode - As nodes are scheduled, we look to see if there are any - // successor nodes that have a single unscheduled predecessor. If so, that - // single predecessor has a higher priority, since scheduling it will make - // the node available. - void ScheduledNode(SUnit *Node); - -private: - void CalculatePriorities(); - int CalcLatency(const SUnit &SU); - void AdjustPriorityOfUnscheduledPreds(SUnit *SU); - SUnit *getSingleUnscheduledPred(SUnit *SU); - - /// RemoveFromPriorityQueue - This is a really inefficient way to remove a - /// node from a priority queue. We should roll our own heap to make this - /// better or something. - void RemoveFromPriorityQueue(SUnit *SU) { + /// remove - This is a really inefficient way to remove a node from a + /// priority queue. We should roll our own heap to make this better or + /// something. + void remove(SUnit *SU) { std::vector<SUnit*> Temp; assert(!Queue.empty() && "Not in queue!"); @@ -400,6 +402,18 @@ private: for (unsigned i = 0, e = Temp.size(); i != e; ++i) Queue.push(Temp[i]); } + + // ScheduledNode - As nodes are scheduled, we look to see if there are any + // successor nodes that have a single unscheduled predecessor. If so, that + // single predecessor has a higher priority, since scheduling it will make + // the node available. + void ScheduledNode(SUnit *Node); + +private: + void CalculatePriorities(); + int CalcLatency(const SUnit &SU); + void AdjustPriorityOfUnscheduledPreds(SUnit *SU); + SUnit *getSingleUnscheduledPred(SUnit *SU); }; } @@ -507,7 +521,7 @@ void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) { // Okay, we found a single predecessor that is available, but not scheduled. // Since it is available, it must be in the priority queue. First remove it. - RemoveFromPriorityQueue(OnlyAvailablePred); + remove(OnlyAvailablePred); // Reinsert the node into the priority queue, which recomputes its // NumNodesSolelyBlocking value. diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index ea23c27..0228d8a 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -25,6 +25,7 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Compiler.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include <climits> #include <queue> @@ -52,9 +53,16 @@ private: bool isBottomUp; /// AvailableQueue - The priority queue to use for the available SUnits. - /// + ///a SchedulingPriorityQueue *AvailableQueue; + /// LiveRegs / LiveRegDefs - A set of physical registers and their definition + /// that are "live". These nodes must be scheduled before any other nodes that + /// modifies the registers can be scheduled. + SmallSet<unsigned, 4> LiveRegs; + std::vector<SUnit*> LiveRegDefs; + std::vector<unsigned> LiveRegCycles; + public: ScheduleDAGRRList(SelectionDAG &dag, MachineBasicBlock *bb, const TargetMachine &tm, bool isbottomup, @@ -72,8 +80,13 @@ public: private: void ReleasePred(SUnit *PredSU, bool isChain, unsigned CurCycle); void ReleaseSucc(SUnit *SuccSU, bool isChain, unsigned CurCycle); + void CapturePred(SUnit *PredSU, SUnit *SU, bool isChain); void ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle); void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); + void UnscheduleNodeBottomUp(SUnit *SU); + SUnit *BackTrackBottomUp(SUnit*, unsigned, unsigned&, bool&); + SUnit *CopyAndMoveSuccessors(SUnit *SU); + bool DelayForLiveRegsBottomUp(SUnit *SU, unsigned &CurCycle); void ListScheduleTopDown(); void ListScheduleBottomUp(); void CommuteNodesToReducePressure(); @@ -84,7 +97,10 @@ private: /// Schedule - Schedule the DAG using list scheduling. void ScheduleDAGRRList::Schedule() { DOUT << "********** List Scheduling **********\n"; - + + LiveRegDefs.resize(MRI->getNumRegs(), NULL); + LiveRegCycles.resize(MRI->getNumRegs(), 0); + // Build scheduling units. BuildSchedUnits(); @@ -130,7 +146,7 @@ void ScheduleDAGRRList::CommuteNodesToReducePressure() { continue; SDNode *OpN = SU->Node->getOperand(j).Val; - SUnit *OpSU = SUnitMap[OpN]; + SUnit *OpSU = SUnitMap[OpN][SU->InstanceNo]; if (OpSU && OperandSeen.count(OpSU) == 1) { // Ok, so SU is not the last use of OpSU, but SU is two-address so // it will clobber OpSU. Try to commute SU if no other source operands @@ -139,7 +155,7 @@ void ScheduleDAGRRList::CommuteNodesToReducePressure() { for (unsigned k = 0; k < NumOps; ++k) { if (k != j) { OpN = SU->Node->getOperand(k).Val; - OpSU = SUnitMap[OpN]; + OpSU = SUnitMap[OpN][SU->InstanceNo]; if (OpSU && OperandSeen.count(OpSU) == 1) { DoCommute = false; break; @@ -178,9 +194,9 @@ void ScheduleDAGRRList::ReleasePred(SUnit *PredSU, bool isChain, PredSU->CycleBound = std::max(PredSU->CycleBound, CurCycle + PredSU->Latency); if (!isChain) - PredSU->NumSuccsLeft--; + --PredSU->NumSuccsLeft; else - PredSU->NumChainSuccsLeft--; + --PredSU->NumChainSuccsLeft; #ifndef NDEBUG if (PredSU->NumSuccsLeft < 0 || PredSU->NumChainSuccsLeft < 0) { @@ -209,19 +225,273 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { SU->Cycle = CurCycle; AvailableQueue->ScheduledNode(SU); - Sequence.push_back(SU); // Bottom up: release predecessors for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) + I != E; ++I) { ReleasePred(I->Dep, I->isCtrl, CurCycle); + if (I->Cost < 0) { + // This is a physical register dependency and it's impossible or + // expensive to copy the register. Make sure nothing that can + // clobber the register is scheduled between the predecessor and + // this node. + if (LiveRegs.insert(I->Reg)) { + LiveRegDefs[I->Reg] = I->Dep; + LiveRegCycles[I->Reg] = CurCycle; + } + } + } + + // Release all the implicit physical register defs that are live. + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->Cost < 0) { + if (LiveRegCycles[I->Reg] == I->Dep->Cycle) { + LiveRegs.erase(I->Reg); + assert(LiveRegDefs[I->Reg] == SU && + "Physical register dependency violated?"); + LiveRegDefs[I->Reg] = NULL; + LiveRegCycles[I->Reg] = 0; + } + } + } + SU->isScheduled = true; } -/// isReady - True if node's lower cycle bound is less or equal to the current -/// scheduling cycle. Always true if all nodes have uniform latency 1. -static inline bool isReady(SUnit *SU, unsigned CurCycle) { - return SU->CycleBound <= CurCycle; +/// CapturePred - This does the opposite of ReleasePred. Since SU is being +/// unscheduled, incrcease the succ left count of its predecessors. Remove +/// them from AvailableQueue if necessary. +void ScheduleDAGRRList::CapturePred(SUnit *PredSU, SUnit *SU, bool isChain) { + PredSU->CycleBound = 0; + for (SUnit::succ_iterator I = PredSU->Succs.begin(), E = PredSU->Succs.end(); + I != E; ++I) { + if (I->Dep == SU) + continue; + PredSU->CycleBound = std::max(PredSU->CycleBound, + I->Dep->Cycle + PredSU->Latency); + } + + if (PredSU->isAvailable) { + PredSU->isAvailable = false; + if (!PredSU->isPending) + AvailableQueue->remove(PredSU); + } + + if (!isChain) + ++PredSU->NumSuccsLeft; + else + ++PredSU->NumChainSuccsLeft; +} + +/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and +/// its predecessor states to reflect the change. +void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { + DOUT << "*** Unscheduling [" << SU->Cycle << "]: "; + DEBUG(SU->dump(&DAG)); + + AvailableQueue->UnscheduledNode(SU); + + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + CapturePred(I->Dep, SU, I->isCtrl); + if (I->Cost < 0 && SU->Cycle == LiveRegCycles[I->Reg]) { + LiveRegs.erase(I->Reg); + assert(LiveRegDefs[I->Reg] == I->Dep && + "Physical register dependency violated?"); + LiveRegDefs[I->Reg] = NULL; + LiveRegCycles[I->Reg] = 0; + } + } + + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->Cost < 0) { + if (LiveRegs.insert(I->Reg)) { + assert(!LiveRegDefs[I->Reg] && + "Physical register dependency violated?"); + LiveRegDefs[I->Reg] = SU; + } + if (I->Dep->Cycle < LiveRegCycles[I->Reg]) + LiveRegCycles[I->Reg] = I->Dep->Cycle; + } + } + + SU->Cycle = 0; + SU->isScheduled = false; + SU->isAvailable = true; + AvailableQueue->push(SU); +} + +/// BackTrackBottomUp - Back track scheduling to a previous cycle specified in +/// BTCycle in order to schedule a specific node. Returns the last unscheduled +/// SUnit. Also returns if a successor is unscheduled in the process. +SUnit *ScheduleDAGRRList::BackTrackBottomUp(SUnit *SU, unsigned BTCycle, + unsigned &CurCycle, bool &SuccUnsched) { + SuccUnsched = false; + SUnit *OldSU = NULL; + while (CurCycle > BTCycle) { + OldSU = Sequence.back(); + Sequence.pop_back(); + if (SU->isSucc(OldSU)) + SuccUnsched = true; + UnscheduleNodeBottomUp(OldSU); + --CurCycle; + } + + + if (SU->isSucc(OldSU)) { + assert(false && "Something is wrong!"); + abort(); + } + + return OldSU; +} + +/// isSafeToCopy - True if the SUnit for the given SDNode can safely cloned, +/// i.e. the node does not produce a flag, it does not read a flag and it does +/// not have an incoming chain. +static bool isSafeToCopy(SDNode *N) { + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) + if (N->getValueType(i) == MVT::Flag) + return false; + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + const SDOperand &Op = N->getOperand(i); + MVT::ValueType VT = Op.Val->getValueType(Op.ResNo); + if (VT == MVT::Other || VT == MVT::Flag) + return false; + } + + return true; +} + +/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled +/// successors to the newly created node. +SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { + SUnit *NewSU = Clone(SU); + + // New SUnit has the exact same predecessors. + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) + if (!I->isSpecial) { + NewSU->addPred(I->Dep, I->isCtrl, false, I->Reg, I->Cost); + NewSU->Depth = std::max(NewSU->Depth, I->Dep->Depth+1); + } + + // Only copy scheduled successors. Cut them from old node's successor + // list and move them over. + SmallVector<SDep*, 2> DelDeps; + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isSpecial) + continue; + NewSU->Height = std::max(NewSU->Height, I->Dep->Height+1); + if (I->Dep->isScheduled) { + I->Dep->addPred(NewSU, I->isCtrl, false, I->Reg, I->Cost); + DelDeps.push_back(I); + } + } + for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { + SUnit *Succ = DelDeps[i]->Dep; + bool isCtrl = DelDeps[i]->isCtrl; + Succ->removePred(SU, isCtrl, false); + } + + AvailableQueue->updateNode(SU); + AvailableQueue->addNode(NewSU); + + return NewSU; +} + +/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay +/// scheduling of the given node to satisfy live physical register dependencies. +/// If the specific node is the last one that's available to schedule, do +/// whatever is necessary (i.e. backtracking or cloning) to make it possible. +bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU, unsigned &CurCycle){ + if (LiveRegs.empty()) + return false; + + // If this node would clobber any "live" register, then it's not ready. + // However, if this is the last "available" node, then we may have to + // backtrack. + bool MustSched = AvailableQueue->empty(); + SmallVector<unsigned, 4> LRegs; + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->Cost < 0) { + unsigned Reg = I->Reg; + if (LiveRegs.count(Reg) && LiveRegDefs[Reg] != I->Dep) + LRegs.push_back(Reg); + for (const unsigned *Alias = MRI->getAliasSet(Reg); + *Alias; ++Alias) + if (LiveRegs.count(*Alias) && LiveRegDefs[*Alias] != I->Dep) + LRegs.push_back(*Alias); + } + } + + for (unsigned i = 0, e = SU->FlaggedNodes.size()+1; i != e; ++i) { + SDNode *Node = (i == 0) ? SU->Node : SU->FlaggedNodes[i-1]; + if (!Node->isTargetOpcode()) + continue; + const TargetInstrDescriptor &TID = TII->get(Node->getTargetOpcode()); + if (!TID.ImplicitDefs) + continue; + for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) { + if (LiveRegs.count(*Reg) && LiveRegDefs[*Reg] != SU) + LRegs.push_back(*Reg); + for (const unsigned *Alias = MRI->getAliasSet(*Reg); + *Alias; ++Alias) + if (LiveRegs.count(*Alias) && LiveRegDefs[*Alias] != SU) + LRegs.push_back(*Alias); + } + } + + if (MustSched && !LRegs.empty()) { + // We have made a mistake by scheduling some nodes too early. Now we must + // schedule the current node which will end up clobbering some live + // registers that are expensive / impossible to copy. Try unscheduling + // up to the point where it's safe to schedule the current node. + unsigned LiveCycle = CurCycle; + for (unsigned i = 0, e = LRegs.size(); i != e; ++i) { + unsigned Reg = LRegs[i]; + unsigned LCycle = LiveRegCycles[Reg]; + LiveCycle = std::min(LiveCycle, LCycle); + } + + if (SU->CycleBound < LiveCycle) { + bool SuccUnsched = false; + SUnit *OldSU = BackTrackBottomUp(SU, LiveCycle, CurCycle, SuccUnsched); + // Force the current node to be scheduled before the node that + // requires the physical reg dep. + if (OldSU->isAvailable) { + OldSU->isAvailable = false; + AvailableQueue->remove(OldSU); + } + SU->addPred(OldSU, true, true); + // If a successor has been unscheduled, then it's not possible to + // schedule the current node. + return SuccUnsched; + } else { + // Try duplicating the nodes that produces these "expensive to copy" + // values to break the dependency. + for (unsigned i = 0, e = LRegs.size(); i != e; ++i) { + unsigned Reg = LRegs[i]; + SUnit *LRDef = LiveRegDefs[Reg]; + if (isSafeToCopy(LRDef->Node)) { + SUnit *NewDef = CopyAndMoveSuccessors(LRDef); + LiveRegDefs[Reg] = NewDef; + NewDef->addPred(SU, true, true); + SU->isAvailable = false; + AvailableQueue->push(NewDef); + } else { + assert(false && "Expensive copying is required?"); + abort(); + } + } + return true; + } + } + return !LRegs.empty(); } /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up @@ -229,30 +499,49 @@ static inline bool isReady(SUnit *SU, unsigned CurCycle) { void ScheduleDAGRRList::ListScheduleBottomUp() { unsigned CurCycle = 0; // Add root to Available queue. - AvailableQueue->push(SUnitMap[DAG.getRoot().Val]); + SUnit *RootSU = SUnitMap[DAG.getRoot().Val].front(); + RootSU->isAvailable = true; + AvailableQueue->push(RootSU); // While Available queue is not empty, grab the node with the highest // priority. If it is not ready put it back. Schedule the node. - std::vector<SUnit*> NotReady; + SmallVector<SUnit*, 4> NotReady; while (!AvailableQueue->empty()) { - SUnit *CurNode = AvailableQueue->pop(); - while (CurNode && !isReady(CurNode, CurCycle)) { - NotReady.push_back(CurNode); - CurNode = AvailableQueue->pop(); + SUnit *CurSU = AvailableQueue->pop(); + while (CurSU) { + if (CurSU->CycleBound <= CurCycle) + if (!DelayForLiveRegsBottomUp(CurSU, CurCycle)) + break; + + // Verify node is still ready. It may not be in case the + // scheduler has backtracked. + if (CurSU->isAvailable) { + CurSU->isPending = true; + NotReady.push_back(CurSU); + } + CurSU = AvailableQueue->pop(); } // Add the nodes that aren't ready back onto the available list. - AvailableQueue->push_all(NotReady); + for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { + NotReady[i]->isPending = false; + if (NotReady[i]->isAvailable) + AvailableQueue->push(NotReady[i]); + } NotReady.clear(); - if (CurNode != NULL) - ScheduleNodeBottomUp(CurNode, CurCycle); - CurCycle++; + if (!CurSU) + Sequence.push_back(0); + else { + ScheduleNodeBottomUp(CurSU, CurCycle); + Sequence.push_back(CurSU); + } + ++CurCycle; } // Add entry node last if (DAG.getEntryNode().Val != DAG.getRoot().Val) { - SUnit *Entry = SUnitMap[DAG.getEntryNode().Val]; + SUnit *Entry = SUnitMap[DAG.getEntryNode().Val].front(); Sequence.push_back(Entry); } @@ -291,9 +580,9 @@ void ScheduleDAGRRList::ReleaseSucc(SUnit *SuccSU, bool isChain, SuccSU->CycleBound = std::max(SuccSU->CycleBound, CurCycle + SuccSU->Latency); if (!isChain) - SuccSU->NumPredsLeft--; + --SuccSU->NumPredsLeft; else - SuccSU->NumChainPredsLeft--; + --SuccSU->NumChainPredsLeft; #ifndef NDEBUG if (SuccSU->NumPredsLeft < 0 || SuccSU->NumChainPredsLeft < 0) { @@ -320,7 +609,6 @@ void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { SU->Cycle = CurCycle; AvailableQueue->ScheduledNode(SU); - Sequence.push_back(SU); // Top down: release successors for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); @@ -333,7 +621,7 @@ void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { /// schedulers. void ScheduleDAGRRList::ListScheduleTopDown() { unsigned CurCycle = 0; - SUnit *Entry = SUnitMap[DAG.getEntryNode().Val]; + SUnit *Entry = SUnitMap[DAG.getEntryNode().Val].front(); // All leaves to Available queue. for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { @@ -346,24 +634,29 @@ void ScheduleDAGRRList::ListScheduleTopDown() { // Emit the entry node first. ScheduleNodeTopDown(Entry, CurCycle); - CurCycle++; + Sequence.push_back(Entry); + ++CurCycle; // While Available queue is not empty, grab the node with the highest // priority. If it is not ready put it back. Schedule the node. std::vector<SUnit*> NotReady; while (!AvailableQueue->empty()) { - SUnit *CurNode = AvailableQueue->pop(); - while (CurNode && !isReady(CurNode, CurCycle)) { - NotReady.push_back(CurNode); - CurNode = AvailableQueue->pop(); + SUnit *CurSU = AvailableQueue->pop(); + while (CurSU && CurSU->CycleBound > CurCycle) { + NotReady.push_back(CurSU); + CurSU = AvailableQueue->pop(); } // Add the nodes that aren't ready back onto the available list. AvailableQueue->push_all(NotReady); NotReady.clear(); - if (CurNode != NULL) - ScheduleNodeTopDown(CurNode, CurCycle); + if (!CurSU) + Sequence.push_back(0); + else { + ScheduleNodeTopDown(CurSU, CurCycle); + Sequence.push_back(CurSU); + } CurCycle++; } @@ -431,14 +724,21 @@ namespace { RegReductionPriorityQueue() : Queue(SF(this)) {} - virtual void initNodes(DenseMap<SDNode*, SUnit*> &sumap, + virtual void initNodes(DenseMap<SDNode*, std::vector<SUnit*> > &sumap, std::vector<SUnit> &sunits) {} + + virtual void addNode(const SUnit *SU) {} + + virtual void updateNode(const SUnit *SU) {} + virtual void releaseState() {} virtual unsigned getNodePriority(const SUnit *SU) const { return 0; } + unsigned size() const { return Queue.size(); } + bool empty() const { return Queue.empty(); } void push(SUnit *U) { @@ -456,16 +756,33 @@ namespace { return V; } - virtual bool isDUOperand(const SUnit *SU1, const SUnit *SU2) { - return false; + /// remove - This is a really inefficient way to remove a node from a + /// priority queue. We should roll our own heap to make this better or + /// something. + void remove(SUnit *SU) { + std::vector<SUnit*> Temp; + + assert(!Queue.empty() && "Not in queue!"); + while (Queue.top() != SU) { + Temp.push_back(Queue.top()); + Queue.pop(); + assert(!Queue.empty() && "Not in queue!"); + } + + // Remove the node from the PQ. + Queue.pop(); + + // Add all the other nodes back. + for (unsigned i = 0, e = Temp.size(); i != e; ++i) + Queue.push(Temp[i]); } }; template<class SF> class VISIBILITY_HIDDEN BURegReductionPriorityQueue : public RegReductionPriorityQueue<SF> { - // SUnitMap SDNode to SUnit mapping (n -> 1). - DenseMap<SDNode*, SUnit*> *SUnitMap; + // SUnitMap SDNode to SUnit mapping (n -> n). + DenseMap<SDNode*, std::vector<SUnit*> > *SUnitMap; // SUnits - The SUnits for the current graph. const std::vector<SUnit> *SUnits; @@ -478,7 +795,7 @@ namespace { explicit BURegReductionPriorityQueue(const TargetInstrInfo *tii) : TII(tii) {} - void initNodes(DenseMap<SDNode*, SUnit*> &sumap, + void initNodes(DenseMap<SDNode*, std::vector<SUnit*> > &sumap, std::vector<SUnit> &sunits) { SUnitMap = &sumap; SUnits = &sunits; @@ -488,6 +805,16 @@ namespace { CalculateSethiUllmanNumbers(); } + void addNode(const SUnit *SU) { + SethiUllmanNumbers.resize(SUnits->size(), 0); + CalcNodeSethiUllmanNumber(SU); + } + + void updateNode(const SUnit *SU) { + SethiUllmanNumbers[SU->NodeNum] = 0; + CalcNodeSethiUllmanNumber(SU); + } + void releaseState() { SUnits = 0; SethiUllmanNumbers.clear(); @@ -519,18 +846,6 @@ namespace { return SethiUllmanNumbers[SU->NodeNum]; } - bool isDUOperand(const SUnit *SU1, const SUnit *SU2) { - unsigned Opc = SU1->Node->getTargetOpcode(); - unsigned NumRes = TII->getNumDefs(Opc); - unsigned NumOps = ScheduleDAG::CountOperands(SU1->Node); - for (unsigned i = 0; i != NumOps; ++i) { - if (TII->getOperandConstraint(Opc, i+NumRes, TOI::TIED_TO) == -1) - continue; - if (SU1->Node->getOperand(i).isOperand(SU2->Node)) - return true; - } - return false; - } private: bool canClobber(SUnit *SU, SUnit *Op); void AddPseudoTwoAddrDeps(); @@ -542,8 +857,8 @@ namespace { template<class SF> class VISIBILITY_HIDDEN TDRegReductionPriorityQueue : public RegReductionPriorityQueue<SF> { - // SUnitMap SDNode to SUnit mapping (n -> 1). - DenseMap<SDNode*, SUnit*> *SUnitMap; + // SUnitMap SDNode to SUnit mapping (n -> n). + DenseMap<SDNode*, std::vector<SUnit*> > *SUnitMap; // SUnits - The SUnits for the current graph. const std::vector<SUnit> *SUnits; @@ -554,7 +869,7 @@ namespace { public: TDRegReductionPriorityQueue() {} - void initNodes(DenseMap<SDNode*, SUnit*> &sumap, + void initNodes(DenseMap<SDNode*, std::vector<SUnit*> > &sumap, std::vector<SUnit> &sunits) { SUnitMap = &sumap; SUnits = &sunits; @@ -562,6 +877,16 @@ namespace { CalculateSethiUllmanNumbers(); } + void addNode(const SUnit *SU) { + SethiUllmanNumbers.resize(SUnits->size(), 0); + CalcNodeSethiUllmanNumber(SU); + } + + void updateNode(const SUnit *SU) { + SethiUllmanNumbers[SU->NodeNum] = 0; + CalcNodeSethiUllmanNumber(SU); + } + void releaseState() { SUnits = 0; SethiUllmanNumbers.clear(); @@ -710,7 +1035,7 @@ bool BURegReductionPriorityQueue<SF>::canClobber(SUnit *SU, SUnit *Op) { for (unsigned i = 0; i != NumOps; ++i) { if (TII->getOperandConstraint(Opc, i+NumRes, TOI::TIED_TO) != -1) { SDNode *DU = SU->Node->getOperand(i).Val; - if (Op == (*SUnitMap)[DU]) + if (Op == (*SUnitMap)[DU][SU->InstanceNo]) return true; } } @@ -740,23 +1065,25 @@ void BURegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { for (unsigned j = 0; j != NumOps; ++j) { if (TII->getOperandConstraint(Opc, j+NumRes, TOI::TIED_TO) != -1) { SDNode *DU = SU->Node->getOperand(j).Val; - SUnit *DUSU = (*SUnitMap)[DU]; + SUnit *DUSU = (*SUnitMap)[DU][SU->InstanceNo]; if (!DUSU) continue; for (SUnit::succ_iterator I = DUSU->Succs.begin(),E = DUSU->Succs.end(); I != E; ++I) { if (I->isCtrl) continue; SUnit *SuccSU = I->Dep; - if (SuccSU != SU && - (!canClobber(SuccSU, DUSU) || - (!SU->isCommutable && SuccSU->isCommutable))){ - if (SuccSU->Depth == SU->Depth && !isReachable(SuccSU, SU)) { - DOUT << "Adding an edge from SU # " << SU->NodeNum - << " to SU #" << SuccSU->NodeNum << "\n"; - if (SU->addPred(SuccSU, true)) - SU->NumChainPredsLeft++; - if (SuccSU->addSucc(SU, true)) - SuccSU->NumChainSuccsLeft++; - } + // Don't constraint nodes with implicit defs. It can create cycles + // plus it may increase register pressures. + if (SuccSU == SU || SuccSU->hasImplicitDefs) + continue; + // Be conservative. Ignore if nodes aren't at the same depth. + if (SuccSU->Depth != SU->Depth) + continue; + if ((!canClobber(SuccSU, DUSU) || + (!SU->isCommutable && SuccSU->isCommutable)) && + !isReachable(SuccSU, SU)) { + DOUT << "Adding an edge from SU # " << SU->NodeNum + << " to SU #" << SuccSU->NodeNum << "\n"; + SU->addPred(SuccSU, true, true); } } } @@ -783,7 +1110,7 @@ CalcNodeSethiUllmanNumber(const SUnit *SU) { SethiUllmanNumber = PredSethiUllman; Extra = 0; } else if (PredSethiUllman == SethiUllmanNumber && !I->isCtrl) - Extra++; + ++Extra; } SethiUllmanNumber += Extra; @@ -813,7 +1140,7 @@ static unsigned SumOfUnscheduledPredsOfSuccs(const SUnit *SU) { EE = SuccSU->Preds.end(); II != EE; ++II) { SUnit *PredSU = II->Dep; if (!PredSU->isScheduled) - Sum++; + ++Sum; } } @@ -906,7 +1233,7 @@ CalcNodeSethiUllmanNumber(const SUnit *SU) { SethiUllmanNumber = PredSethiUllman; Extra = 0; } else if (PredSethiUllman == SethiUllmanNumber && !I->isCtrl) - Extra++; + ++Extra; } SethiUllmanNumber += Extra; diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSimple.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSimple.cpp index 6014fe9..286ef1f 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSimple.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSimple.cpp @@ -697,9 +697,9 @@ void ScheduleDAGSimple::EmitAll() { NodeInfo *NI = Ordering[i]; if (NI->isInGroup()) { NodeGroupIterator NGI(Ordering[i]); - while (NodeInfo *NI = NGI.next()) EmitNode(NI->Node, VRBaseMap); + while (NodeInfo *NI = NGI.next()) EmitNode(NI->Node, 0, VRBaseMap); } else { - EmitNode(NI->Node, VRBaseMap); + EmitNode(NI->Node, 0, VRBaseMap); } } } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index 73902b8..734d0f2 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -281,7 +281,7 @@ namespace llvm { GraphWriter<ScheduleDAG*> &GW) { GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot"); if (G->DAG.getRoot().Val) - GW.emitEdge(0, -1, G->SUnitMap[G->DAG.getRoot().Val], -1, ""); + GW.emitEdge(0, -1, G->SUnitMap[G->DAG.getRoot().Val].front(), -1, ""); } }; } |