diff options
author | Guochun Shi <gshi1@uiuc.edu> | 2003-03-27 17:57:44 +0000 |
---|---|---|
committer | Guochun Shi <gshi1@uiuc.edu> | 2003-03-27 17:57:44 +0000 |
commit | f1c154f5e69fdd11426b4e2a5cdea98fcab1606b (patch) | |
tree | a92fed3ba423c4c5707e22978f4d6710931ad1fc /lib/CodeGen/ModuloScheduling | |
parent | c277eaa41eb2ed6ab70855e170c07a17059c2bf3 (diff) | |
download | external_llvm-f1c154f5e69fdd11426b4e2a5cdea98fcab1606b.zip external_llvm-f1c154f5e69fdd11426b4e2a5cdea98fcab1606b.tar.gz external_llvm-f1c154f5e69fdd11426b4e2a5cdea98fcab1606b.tar.bz2 |
*** empty log message ***
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@5755 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/CodeGen/ModuloScheduling')
-rw-r--r-- | lib/CodeGen/ModuloScheduling/Makefile | 7 | ||||
-rw-r--r-- | lib/CodeGen/ModuloScheduling/ModuloSchedGraph.cpp | 1313 | ||||
-rw-r--r-- | lib/CodeGen/ModuloScheduling/ModuloSchedGraph.h | 347 | ||||
-rw-r--r-- | lib/CodeGen/ModuloScheduling/ModuloScheduling.cpp | 899 | ||||
-rw-r--r-- | lib/CodeGen/ModuloScheduling/ModuloScheduling.h | 160 |
5 files changed, 2726 insertions, 0 deletions
diff --git a/lib/CodeGen/ModuloScheduling/Makefile b/lib/CodeGen/ModuloScheduling/Makefile new file mode 100644 index 0000000..adbc021 --- /dev/null +++ b/lib/CodeGen/ModuloScheduling/Makefile @@ -0,0 +1,7 @@ +LEVEL = ../../.. + +DIRS = + +LIBRARYNAME = modulosched + +include $(LEVEL)/Makefile.common diff --git a/lib/CodeGen/ModuloScheduling/ModuloSchedGraph.cpp b/lib/CodeGen/ModuloScheduling/ModuloSchedGraph.cpp new file mode 100644 index 0000000..4e36b07 --- /dev/null +++ b/lib/CodeGen/ModuloScheduling/ModuloSchedGraph.cpp @@ -0,0 +1,1313 @@ +#include <math.h> +#include "ModuloSchedGraph.h" +#include "llvm/CodeGen/InstrSelection.h" +#include "llvm/BasicBlock.h" +#include "llvm/Function.h" +#include "llvm/iOther.h" +#include "Support/StringExtras.h" +#include "Support/STLExtras.h" +#include <iostream> +#include <swig.h> +#include "llvm/iOperators.h" +#include "llvm/iOther.h" +#include "llvm/iPHINode.h" +#include "llvm/iTerminators.h" +#include "llvm/iMemory.h" +#include "llvm/Type.h" +#include "llvm/CodeGen/MachineCodeForInstruction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/Target/MachineSchedInfo.h" + +#define UNIDELAY 1 +#define min(a, b) ((a) < (b) ? (a) : (b)) +#define max(a, b) ((a) < (b) ? (b) : (a)) + +using std::vector; +using std::pair; +//using std::hash_map; +using std::cerr; +extern std::ostream modSched_os; +extern ModuloSchedDebugLevel_t ModuloSchedDebugLevel; +//*********************** Internal Data Structures *************************/ + +// The following two types need to be classes, not typedefs, so we can use +// opaque declarations in SchedGraph.h +// +struct RefVec: public vector< pair<ModuloSchedGraphNode*, int> > { + typedef vector< pair<ModuloSchedGraphNode*, int> >:: iterator iterator; + typedef vector< pair<ModuloSchedGraphNode*, int> >::const_iterator const_iterator; +}; + +struct RegToRefVecMap: public hash_map<int, RefVec> { + typedef hash_map<int, RefVec>:: iterator iterator; + typedef hash_map<int, RefVec>::const_iterator const_iterator; +}; + +struct ValueToDefVecMap: public hash_map<const Instruction*, RefVec> { + typedef hash_map<const Instruction*, RefVec>:: iterator iterator; + typedef hash_map<const Instruction*, RefVec>::const_iterator const_iterator; +}; + + + +// class Modulo SchedGraphNode + +/*ctor*/ +ModuloSchedGraphNode::ModuloSchedGraphNode(unsigned int _nodeId, + const BasicBlock* _bb, + const Instruction* _inst, + int indexInBB, + const TargetMachine& target) + : + SchedGraphNodeCommon(_nodeId, _bb, indexInBB), + inst(_inst) +{ + if(inst) + { + //FIXME: find the latency + //currently setthe latency to zero + latency =0; + } + +} + +//class ModuloScheGraph + + +void +ModuloSchedGraph::addDummyEdges() +{ + assert(graphRoot->outEdges.size() == 0); + + for (const_iterator I=begin(); I != end(); ++I) + { + ModuloSchedGraphNode* node = (ModuloSchedGraphNode*)( (*I).second); + assert(node != graphRoot && node != graphLeaf); + if (node->beginInEdges() == node->endInEdges()) + (void) new SchedGraphEdge(graphRoot, node, SchedGraphEdge::CtrlDep, + SchedGraphEdge::NonDataDep, 0); + if (node->beginOutEdges() == node->endOutEdges()) + (void) new SchedGraphEdge(node, graphLeaf, SchedGraphEdge::CtrlDep, + SchedGraphEdge::NonDataDep, 0); + } +} + +bool isDefinition(const Instruction* I) +{ + //if(TerminatorInst::classof(I)||FreeInst::classof(I) || StoreInst::classof(I) || CallInst::classof(I)) + if(!I->hasName()) + return false; + else + return true; +} + +void ModuloSchedGraph::addDefUseEdges(const BasicBlock* bb) +{ + //collect def instructions, store them in vector + const MachineInstrInfo& mii = target.getInstrInfo(); + + typedef std::vector<ModuloSchedGraphNode*> DefVec; + DefVec defVec; + + //find those def instructions + for(BasicBlock::const_iterator I=bb->begin(), E=bb->end(); I !=E; I++) + if(I->getType() != Type::VoidTy) + { + defVec.push_back(this->getGraphNodeForInst(I)) ; + } + + for(unsigned int i=0; i < defVec.size();i++) + for(Value::use_const_iterator I=defVec[i]->getInst()->use_begin();I !=defVec[i]->getInst()->use_end() ;I++) + { + //for each use of a def, add a flow edge from the def instruction to the ref instruction + + + const Instruction* value=defVec[i]->getInst(); + Instruction *inst=(Instruction*)(*I); + ModuloSchedGraphNode* node=NULL; + + for(BasicBlock::const_iterator I=bb->begin(), E=bb->end(); I !=E; I++) + if((const Instruction*)I == inst) + { + node=(*this)[inst]; + break; + } + + assert(inst != NULL &&" Use not an Instruction ?" ); + + if(node == NULL) //inst is not an instruction in this block + {} + else + { + //add a flow edge from the def instruction to the ref instruction + + //self loop will not happen in SSA form + assert(defVec[i] != node && "same node?"); + + + //this is a true dependence, so the delay is equal to the delay of the pred node. + int delay=0; + MachineCodeForInstruction& tempMvec= MachineCodeForInstruction::get(value); + for(unsigned j=0;j< tempMvec.size();j++) + { + MachineInstr* temp=tempMvec[j]; + //delay +=mii.minLatency(temp->getOpCode()); + delay = max(delay, mii.minLatency(temp->getOpCode())); + } + + SchedGraphEdge* trueEdge=new SchedGraphEdge(defVec[i],node,value, SchedGraphEdge::TrueDep,delay); + + //if the ref instruction is before the def instrution + //then the def instruction must be a phi instruction + //add an anti-dependence edge to from the ref instruction to the def instruction + if( node->getOrigIndexInBB() < defVec[i]->getOrigIndexInBB()) + { + assert(PHINode::classof(inst) && "the ref instruction befre def is not PHINode?"); + trueEdge->setIteDiff(1); + } + + } + + } +} + +void ModuloSchedGraph::addCDEdges(const BasicBlock* bb) +{ + //find the last instruction in the basic block + //see if it is an branch instruction. + //If yes, then add an edge from each node expcept the last node to the last node + + const Instruction* inst=&(bb->back()); + ModuloSchedGraphNode* lastNode=(*this)[inst]; + if( TerminatorInst::classof(inst)) + for(BasicBlock::const_iterator I=bb->begin(),E=bb->end(); I != E; I++) + { + if(inst != I) + { + ModuloSchedGraphNode* node = (*this)[I]; + //use latency of 0 + (void) new SchedGraphEdge(node,lastNode,SchedGraphEdge::CtrlDep, + SchedGraphEdge::NonDataDep,0); + } + + } + + +} + + + + +static const int SG_LOAD_REF = 0; +static const int SG_STORE_REF = 1; +static const int SG_CALL_REF = 2; + +static const unsigned int SG_DepOrderArray[][3] = { + { SchedGraphEdge::NonDataDep, + SchedGraphEdge::AntiDep, + SchedGraphEdge::AntiDep }, + { SchedGraphEdge::TrueDep, + SchedGraphEdge::OutputDep, + SchedGraphEdge::TrueDep | SchedGraphEdge::OutputDep }, + { SchedGraphEdge::TrueDep, + SchedGraphEdge::AntiDep | SchedGraphEdge::OutputDep, + SchedGraphEdge::TrueDep | SchedGraphEdge::AntiDep + | SchedGraphEdge::OutputDep } +}; + + +// Add a dependence edge between every pair of machine load/store/call +// instructions, where at least one is a store or a call. +// Use latency 1 just to ensure that memory operations are ordered; +// latency does not otherwise matter (true dependences enforce that). +// +void +ModuloSchedGraph::addMemEdges(const BasicBlock* bb) +{ + + vector<ModuloSchedGraphNode*> memNodeVec; + + //construct the memNodeVec + + for( BasicBlock::const_iterator I=bb->begin(), E=bb->end();I != E; I++) + if(LoadInst::classof(I) ||StoreInst::classof(I) || CallInst::classof(I)) + { + ModuloSchedGraphNode* node =(*this)[(const Instruction*)I]; + memNodeVec.push_back(node); + } + // Instructions in memNodeVec are in execution order within the basic block, + // so simply look at all pairs <memNodeVec[i], memNodeVec[j: j > i]>. + // + for (unsigned im=0, NM=memNodeVec.size(); im < NM; im++) + { + const Instruction* fromInst = memNodeVec[im]->getInst(); + int fromType = CallInst::classof(fromInst)? SG_CALL_REF + : LoadInst::classof(fromInst)? SG_LOAD_REF + : SG_STORE_REF; + for (unsigned jm=im+1; jm < NM; jm++) + { + const Instruction* toInst=memNodeVec[jm]->getInst(); + int toType = CallInst::classof(toInst)? SG_CALL_REF + : LoadInst::classof(toInst)? SG_LOAD_REF + : SG_STORE_REF; + if (fromType != SG_LOAD_REF || toType != SG_LOAD_REF) + { + (void) new SchedGraphEdge(memNodeVec[im], memNodeVec[jm], + SchedGraphEdge::MemoryDep, + SG_DepOrderArray[fromType][toType], 1); + + SchedGraphEdge* edge=new SchedGraphEdge(memNodeVec[jm],memNodeVec[im], + SchedGraphEdge::MemoryDep, + SG_DepOrderArray[toType][fromType],1); + edge->setIteDiff(1); + + } + } + } +} + + + +void ModuloSchedGraph::buildNodesforBB (const TargetMachine& target, + const BasicBlock* bb, + std::vector<ModuloSchedGraphNode*>& memNode, + RegToRefVecMap& regToRefVecMap, + ValueToDefVecMap& valueToDefVecMap) +{ + //const MachineInstrInfo& mii=target.getInstrInfo(); + + //Build graph nodes for each LLVM instruction and gather def/use info. + //Do both together in a single pass over all machine instructions. + + int i=0; + for(BasicBlock::const_iterator I=bb->begin(), E=bb->end(); I!=E; I++) + { + ModuloSchedGraphNode* node=new ModuloSchedGraphNode(getNumNodes(), bb,I,i, target); + i++; + this->noteModuloSchedGraphNodeForInst(I,node); + } + + //this function finds some info about instruction in basic block for later use + //findDefUseInfoAtInstr(target, node, memNode,regToRefVecMap,valueToDefVecMap); + + +} + + +bool ModuloSchedGraph::isLoop(const BasicBlock* bb) +{ + //only if the last instruction in the basicblock is branch instruction and + //there is at least an option to branch itself + + + const Instruction* inst=&(bb->back()); + if( BranchInst::classof(inst)) + for(unsigned i=0;i < ((BranchInst*)inst)->getNumSuccessors();i++) + { + BasicBlock* sb=((BranchInst*)inst)->getSuccessor(i); + if(sb ==bb) return true; + } + + return false; + +} +bool ModuloSchedGraph::isLoop() +{ + //only if the last instruction in the basicblock is branch instruction and + //there is at least an option to branch itself + + assert(bbVec.size() ==1 &&"only 1 basicblock in a graph"); + const BasicBlock* bb=bbVec[0]; + const Instruction* inst=&(bb->back()); + if( BranchInst::classof(inst)) + for(unsigned i=0;i < ((BranchInst*)inst)->getNumSuccessors();i++) + { + BasicBlock* sb=((BranchInst*)inst)->getSuccessor(i); + if(sb ==bb) return true; + } + + return false; + +} + +void ModuloSchedGraph::computeNodeASAP(const BasicBlock* bb) +{ + + //FIXME: now assume the only backward edges come from the edges from other nodes to the phi Node + //so i will ignore all edges to the phi node; after this, there shall be no recurrence. + + unsigned numNodes=bb->size(); + for(unsigned i=2;i < numNodes+2;i++) + { + ModuloSchedGraphNode* node=getNode(i); + node->setPropertyComputed(false); + } + + for(unsigned i=2;i < numNodes+2; i++) + { + ModuloSchedGraphNode* node=getNode(i); + node->ASAP=0; + if(i==2 ||node->getNumInEdges() ==0) + { node->setPropertyComputed(true);continue;} + for(ModuloSchedGraphNode::const_iterator I=node->beginInEdges(), E=node->endInEdges();I !=E; I++) + { + SchedGraphEdge* edge=*I; + ModuloSchedGraphNode* pred=(ModuloSchedGraphNode*)(edge->getSrc()); + assert(pred->getPropertyComputed()&&"pred node property is not computed!"); + int temp=pred->ASAP + edge->getMinDelay() - edge->getIteDiff()*this->MII; + node->ASAP= max(node->ASAP,temp); + } + node->setPropertyComputed(true); + } +} + +void ModuloSchedGraph::computeNodeALAP(const BasicBlock* bb) +{ + + unsigned numNodes=bb->size(); + int maxASAP=0; + for(unsigned i=numNodes+1;i >= 2;i--) + { + ModuloSchedGraphNode* node=getNode(i); + node->setPropertyComputed(false); + //cerr<< " maxASAP= " <<maxASAP<<" node->ASAP= "<< node->ASAP<<"\n"; + maxASAP=max(maxASAP,node->ASAP); + } + + //cerr<<"maxASAP is "<<maxASAP<<"\n"; + + for(unsigned i=numNodes+1;i >= 2; i--) + { + ModuloSchedGraphNode* node=getNode(i); + node->ALAP=maxASAP; + for(ModuloSchedGraphNode::const_iterator I=node->beginOutEdges(), E=node->endOutEdges(); I!=E; I++) + { + SchedGraphEdge* edge= *I; + ModuloSchedGraphNode* succ=(ModuloSchedGraphNode*) (edge->getSink()); + if(PHINode::classof(succ->getInst()))continue; + assert(succ->getPropertyComputed()&&"succ node property is not computed!"); + int temp=succ->ALAP - edge->getMinDelay()+edge->getIteDiff()*this->MII; + node->ALAP =min(node->ALAP,temp); + } + node->setPropertyComputed(true); + + } +} + +void ModuloSchedGraph::computeNodeMov(const BasicBlock* bb) +{ + unsigned numNodes=bb->size(); + for(unsigned i =2;i < numNodes +2 ;i++) + { + ModuloSchedGraphNode* node=getNode(i); + node->mov=node->ALAP-node->ASAP; + assert(node->mov >=0 &&"move freedom for this node is less than zero? "); + } +} + + +void ModuloSchedGraph::computeNodeDepth(const BasicBlock* bb) +{ + + unsigned numNodes=bb->size(); + for(unsigned i=2;i < numNodes+2;i++) + { + ModuloSchedGraphNode* node=getNode(i); + node->setPropertyComputed(false); + } + + for(unsigned i=2;i < numNodes+2; i++) + { + ModuloSchedGraphNode* node=getNode(i); + node->depth=0; + if(i==2 ||node->getNumInEdges() ==0) + { node->setPropertyComputed(true);continue;} + for(ModuloSchedGraphNode::const_iterator I=node->beginInEdges(), E=node->endInEdges();I !=E; I++) + { + SchedGraphEdge* edge=*I; + ModuloSchedGraphNode* pred=(ModuloSchedGraphNode*)(edge->getSrc()); + assert(pred->getPropertyComputed()&&"pred node property is not computed!"); + int temp=pred->depth + edge->getMinDelay(); + node->depth= max(node->depth,temp); + } + node->setPropertyComputed(true); + } + +} + + +void ModuloSchedGraph::computeNodeHeight(const BasicBlock* bb) +{ + unsigned numNodes=bb->size(); + for(unsigned i=numNodes+1;i >= 2;i--) + { + ModuloSchedGraphNode* node=getNode(i); + node->setPropertyComputed(false); + } + + for(unsigned i=numNodes+1;i >= 2; i--) + { + ModuloSchedGraphNode* node=getNode(i); + node->height=0; + for(ModuloSchedGraphNode::const_iterator I=node->beginOutEdges(), E=node->endOutEdges(); I!=E; I++) + { + SchedGraphEdge* edge= *I; + ModuloSchedGraphNode* succ=(ModuloSchedGraphNode*)(edge->getSink()); + if(PHINode::classof(succ->getInst())) continue; + assert(succ->getPropertyComputed()&&"succ node property is not computed!"); + node->height=max(node->height, succ->height+edge->getMinDelay()); + + } + node->setPropertyComputed(true); + + } + +} + +void ModuloSchedGraph::computeNodeProperty(const BasicBlock* bb) +{ + //FIXME: now assume the only backward edges come from the edges from other nodes to the phi Node + //so i will ignore all edges to the phi node; after this, there shall be no recurrence. + + this->computeNodeASAP(bb); + this->computeNodeALAP(bb); + this->computeNodeMov(bb); + this->computeNodeDepth(bb); + this->computeNodeHeight(bb); +} + + +//do not consider the backedge in these two functions: +// i.e. don't consider the edge with destination in phi node +std::vector<ModuloSchedGraphNode*> ModuloSchedGraph::predSet(std::vector<ModuloSchedGraphNode*> set, unsigned backEdgeSrc, unsigned backEdgeSink) +{ + std::vector<ModuloSchedGraphNode*> predS; + for(unsigned i=0; i < set.size(); i++) + { + ModuloSchedGraphNode* node = set[i]; + for(ModuloSchedGraphNode::const_iterator I=node->beginInEdges(), E=node->endInEdges(); I !=E; I++) + { + SchedGraphEdge* edge= *I; + if(edge->getSrc()->getNodeId() ==backEdgeSrc && edge->getSink()->getNodeId() == backEdgeSink) continue; + ModuloSchedGraphNode* pred= (ModuloSchedGraphNode*)(edge->getSrc()); + //if pred is not in the predSet, push it in vector + bool alreadyInset=false; + for(unsigned j=0; j < predS.size(); j++) + if(predS[j]->getNodeId() == pred->getNodeId() ) + {alreadyInset=true;break;} + + for(unsigned j=0; j < set.size(); j++) + if(set[j]->getNodeId() == pred->getNodeId() ) + {alreadyInset=true; break;} + + if(! alreadyInset) + predS.push_back(pred); + } + } + return predS; +} + +ModuloSchedGraph::NodeVec ModuloSchedGraph::predSet(NodeVec set) +{ + //node number increases from 2 + return predSet(set,0, 0); +} + + +std::vector<ModuloSchedGraphNode*> ModuloSchedGraph::predSet(ModuloSchedGraphNode* _node, unsigned backEdgeSrc, unsigned backEdgeSink) +{ + + std::vector<ModuloSchedGraphNode*> set; + set.push_back(_node); + return predSet(set, backEdgeSrc, backEdgeSink); + +} + +std::vector<ModuloSchedGraphNode*> ModuloSchedGraph::predSet(ModuloSchedGraphNode* _node) +{ + return predSet(_node,0,0); +} + +std::vector<ModuloSchedGraphNode*> ModuloSchedGraph::succSet(std::vector<ModuloSchedGraphNode*> set,unsigned src, unsigned sink) +{ + vector<ModuloSchedGraphNode*> succS; + for(unsigned i=0; i < set.size(); i++) + { + ModuloSchedGraphNode* node = set[i]; + for(ModuloSchedGraphNode::const_iterator I=node->beginOutEdges(), E=node->endOutEdges(); I !=E; I++) + { + SchedGraphEdge* edge= *I; + if(edge->getSrc()->getNodeId() == src && edge->getSink()->getNodeId() == sink) continue; + ModuloSchedGraphNode* succ= (ModuloSchedGraphNode*)(edge->getSink()); + //if pred is not in the predSet, push it in vector + bool alreadyInset=false; + for(unsigned j=0; j < succS.size(); j++) + if(succS[j]->getNodeId() == succ->getNodeId() ) + {alreadyInset=true;break;} + + for(unsigned j=0; j < set.size(); j++) + if(set[j]->getNodeId() == succ->getNodeId() ) + {alreadyInset=true;break;} + if(! alreadyInset) + succS.push_back(succ); + } + } + return succS; +} + +ModuloSchedGraph::NodeVec ModuloSchedGraph::succSet(NodeVec set) +{ + return succSet(set,0,0); +} + + +std::vector<ModuloSchedGraphNode*> ModuloSchedGraph::succSet(ModuloSchedGraphNode* _node,unsigned src, unsigned sink) +{ + std::vector<ModuloSchedGraphNode*> set; + set.push_back(_node); + return succSet(set, src, sink); + + +} + +std::vector<ModuloSchedGraphNode*> ModuloSchedGraph::succSet(ModuloSchedGraphNode* _node) +{ + return succSet(_node, 0, 0); +} + +SchedGraphEdge* ModuloSchedGraph::getMaxDelayEdge(unsigned srcId, unsigned sinkId) +{ + + ModuloSchedGraphNode* node =getNode(srcId); + SchedGraphEdge* maxDelayEdge=NULL; + int maxDelay=-1; + for(ModuloSchedGraphNode::const_iterator I=node->beginOutEdges(), E=node->endOutEdges();I !=E; I++) + { + SchedGraphEdge* edge= *I; + if(edge->getSink()->getNodeId() == sinkId) + if(edge->getMinDelay() > maxDelay){ + maxDelayEdge=edge; + maxDelay=edge->getMinDelay(); + } + } + assert(maxDelayEdge != NULL&&"no edge between the srcId and sinkId?"); + return maxDelayEdge; +} + +void ModuloSchedGraph::dumpCircuits() +{ + modSched_os<<"dumping circuits for graph: "<<"\n"; + int j=-1; + while(circuits[++j][0] !=0){ + int k=-1; + while(circuits[j][++k] !=0) + modSched_os<< circuits[j][k]<<"\t"; + modSched_os<<"\n"; + } +} + +void ModuloSchedGraph::dumpSet(std::vector<ModuloSchedGraphNode*> set) +{ + for(unsigned i=0;i < set.size() ; i++) + modSched_os<< set[i]->getNodeId()<<"\t"; + modSched_os<<"\n"; +} + +std::vector<ModuloSchedGraphNode*> ModuloSchedGraph::vectorUnion(std::vector<ModuloSchedGraphNode*> set1,std::vector<ModuloSchedGraphNode*> set2 ) +{ + std::vector<ModuloSchedGraphNode*> unionVec; + for(unsigned i=0;i<set1.size();i++) + unionVec.push_back(set1[i]); + for(unsigned j=0;j < set2.size();j++){ + bool inset=false; + for(unsigned i=0;i < unionVec.size();i++) + if(set2[j]==unionVec[i]) inset=true; + if(!inset)unionVec.push_back(set2[j]); + } + return unionVec; +} +std::vector<ModuloSchedGraphNode*> ModuloSchedGraph::vectorConj(std::vector<ModuloSchedGraphNode*> set1,std::vector<ModuloSchedGraphNode*> set2 ) +{ + std::vector<ModuloSchedGraphNode*> conjVec; + for(unsigned i=0;i<set1.size();i++) + for(unsigned j=0;j < set2.size();j++) + if(set1[i]==set2[j]) conjVec.push_back(set1[i]); + return conjVec; +} + +ModuloSchedGraph::NodeVec ModuloSchedGraph::vectorSub(NodeVec set1, NodeVec set2) +{ + NodeVec newVec; + for(NodeVec::iterator I=set1.begin(); I != set1.end(); I++){ + bool inset=false; + for(NodeVec::iterator II=set2.begin(); II!=set2.end(); II++) + if( (*I)->getNodeId() ==(*II)->getNodeId()) + {inset=true;break;} + if(!inset) newVec.push_back(*I); + } + return newVec; +} + +void ModuloSchedGraph::orderNodes(){ + oNodes.clear(); + + std::vector<ModuloSchedGraphNode*> set; + const BasicBlock* bb=bbVec[0]; + unsigned numNodes = bb->size(); + + //first order all the sets + int j=-1; + int totalDelay=-1; + int preDelay=-1; + while(circuits[++j][0] !=0){ + int k=-1; + preDelay=totalDelay; + + while(circuits[j][++k] !=0){ + ModuloSchedGraphNode* node =getNode(circuits[j][k]); + unsigned nextNodeId; + nextNodeId =circuits[j][k+1] !=0? circuits[j][k+1]:circuits[j][0]; + SchedGraphEdge* edge = getMaxDelayEdge(circuits[j][k], nextNodeId); + totalDelay += edge->getMinDelay(); + } + if(preDelay != -1 && totalDelay > preDelay){ + //swap circuits[j][] and cuicuits[j-1][] + unsigned temp[MAXNODE]; + for(int k=0;k<MAXNODE;k++){ + temp[k]=circuits[j-1][k]; + circuits[j-1][k]=circuits[j][k]; + circuits[j][k]=temp[k]; + } + //restart + j=-1; + } + } + + //build the first set + int backEdgeSrc; + int backEdgeSink; + if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess) + modSched_os<<"building the first set"<<"\n"; + int setSeq=-1; + int k=-1; + setSeq++; + while(circuits[setSeq][++k]!=0) + set.push_back(getNode(circuits[setSeq][k])); + if(circuits[setSeq][0]!=0){ + backEdgeSrc=circuits[setSeq][k-1]; + backEdgeSink=circuits[setSeq][0]; + } + if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess){ + modSched_os<<"the first set is:"; + dumpSet(set); + } + + //implement the ordering algorithm + enum OrderSeq{ bottom_up, top_down}; + OrderSeq order; + std::vector<ModuloSchedGraphNode*> R; + while(!set.empty()) + { + std::vector<ModuloSchedGraphNode*> pset=predSet(oNodes); + std::vector<ModuloSchedGraphNode*> sset=succSet(oNodes); + + if(!pset.empty() && !vectorConj(pset,set).empty()) + {R=vectorConj(pset,set);order=bottom_up;} + else if( !sset.empty() && !vectorConj(sset,set).empty()) + {R=vectorConj(sset,set);order=top_down;} + else + { + int maxASAP=-1; + int position=-1; + for(unsigned i=0;i<set.size();i++){ + int temp= set[i]->getASAP(); + if(temp>maxASAP ) { + maxASAP=temp; + position=i; + } + } + R.push_back(set[position]); + order=bottom_up; + } + + while(!R.empty()){ + if(order== top_down){ + if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess) + modSched_os<<"in top_down round"<<"\n"; + while(!R.empty()){ + int maxHeight=-1; + NodeVec::iterator chosenI; + for(NodeVec::iterator I=R.begin();I != R.end();I++){ + int temp=(*I)->height; + if( (temp > maxHeight) || ( temp == maxHeight && (*I)->mov <= (*chosenI)->mov ) ){ + + if((temp > maxHeight) || ( temp == maxHeight && (*I)->mov < (*chosenI)->mov )){ + maxHeight=temp; + chosenI=I; + continue; + } + //possible case: instruction A and B has the same height and mov, but A has dependence to B + //e.g B is the branch instruction in the end, or A is the phi instruction at the beginning + if((*I)->mov == (*chosenI)->mov) + for(ModuloSchedGraphNode::const_iterator oe=(*I)->beginOutEdges(), end=(*I)->endOutEdges();oe !=end; oe++){ + if((*oe)->getSink() == (*chosenI)){ + maxHeight=temp; + chosenI=I; + continue; + } + } + } + } + ModuloSchedGraphNode* mu= *chosenI; + oNodes.push_back(mu); + R.erase(chosenI); + std::vector<ModuloSchedGraphNode*> succ_mu= succSet(mu,backEdgeSrc,backEdgeSink); + std::vector<ModuloSchedGraphNode*> comm=vectorConj(succ_mu,set); + comm=vectorSub(comm,oNodes); + R = vectorUnion(comm, R); + } + order=bottom_up; + R= vectorConj(predSet(oNodes), set); + } else{ + if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess) + modSched_os<<"in bottom up round"<<"\n"; + while(!R.empty()){ + int maxDepth=-1; + NodeVec::iterator chosenI; + for(NodeVec::iterator I=R.begin();I != R.end();I++){ + int temp=(*I)->depth; + if( (temp > maxDepth) || ( temp == maxDepth && (*I)->mov < (*chosenI)->mov )){ + maxDepth=temp; + chosenI=I; + } + } + ModuloSchedGraphNode* mu=*chosenI; + oNodes.push_back(mu); + R.erase(chosenI); + std::vector<ModuloSchedGraphNode*> pred_mu= predSet(mu,backEdgeSrc,backEdgeSink); + std::vector<ModuloSchedGraphNode*> comm=vectorConj(pred_mu,set); + comm=vectorSub(comm,oNodes); + R = vectorUnion(comm, R); + } + order=top_down; + R= vectorConj(succSet(oNodes), set); + } + } + if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess){ + modSched_os<<"order finished"<<"\n"; + modSched_os<<"dumping the ordered nodes: "<<"\n"; + dumpSet(oNodes); + dumpCircuits(); + } + //create a new set + //FIXME: the nodes between onodes and this circuit should also be include in this set + if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess) + modSched_os<<"building the next set"<<"\n"; + set.clear(); + int k=-1; + setSeq++; + while(circuits[setSeq][++k]!=0) + set.push_back(getNode(circuits[setSeq][k])); + if(circuits[setSeq][0]!=0){ + backEdgeSrc=circuits[setSeq][k-1]; + backEdgeSink=circuits[setSeq][0]; + } + if(set.empty()){ + //no circuits any more + //collect all other nodes + if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess) + modSched_os<<"no circuits any more, collect the rest nodes"<<"\n"; + for(unsigned i=2;i<numNodes+2;i++){ + bool inset=false; + for(unsigned j=0;j< oNodes.size();j++) + if(oNodes[j]->getNodeId() == i) + {inset=true;break;} + if(!inset)set.push_back(getNode(i)); + } + } + if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess){ + modSched_os<<"next set is: "<<"\n"; + dumpSet(set); + } + }//while(!set.empty()) + +} + + + + + + +void ModuloSchedGraph::buildGraph (const TargetMachine& target) +{ + const BasicBlock* bb=bbVec[0]; + + assert(bbVec.size() ==1 &&"only handling a single basic block here"); + + // Use this data structure to note all machine operands that compute + // ordinary LLVM values. These must be computed defs (i.e., instructions). + // Note that there may be multiple machine instructions that define + // each Value. + ValueToDefVecMap valueToDefVecMap; + + // Use this data structure to note all memory instructions. + // We use this to add memory dependence edges without a second full walk. + // + // vector<const Instruction*> memVec; + vector<ModuloSchedGraphNode*> memNodeVec; + + // Use this data structure to note any uses or definitions of + // machine registers so we can add edges for those later without + // extra passes over the nodes. + // The vector holds an ordered list of references to the machine reg, + // ordered according to control-flow order. This only works for a + // single basic block, hence the assertion. Each reference is identified + // by the pair: <node, operand-number>. + // + RegToRefVecMap regToRefVecMap; + + + + // Make a dummy root node. We'll add edges to the real roots later. + graphRoot = new ModuloSchedGraphNode(0, NULL, NULL, -1,target); + graphLeaf = new ModuloSchedGraphNode(1, NULL, NULL, -1,target); + + //---------------------------------------------------------------- + // First add nodes for all the LLVM instructions in the basic block + // because this greatly simplifies identifying which edges to add. + // Do this one VM instruction at a time since the ModuloSchedGraphNode needs that. + // Also, remember the load/store instructions to add memory deps later. + //---------------------------------------------------------------- + + //FIXME:if there is call instruction, then we shall quit somewhere + // currently we leave call instruction and continue construct graph + + //dump only the blocks which are from loops + + + if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess) + this->dump(bb); + + if(!isLoop(bb)){ + modSched_os <<" dumping non-loop BB:"<<endl; + dump(bb); + } + if( isLoop(bb)) + { + buildNodesforBB(target, bb, memNodeVec, regToRefVecMap, valueToDefVecMap); + + this->addDefUseEdges(bb); + + this->addCDEdges(bb); + + this->addMemEdges(bb); + + //this->dump(); + + int ResII=this->computeResII(bb); + if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess) + modSched_os << "ResII is " << ResII<<"\n";; + int RecII=this->computeRecII(bb); + if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess) + modSched_os << "RecII is " <<RecII<<"\n"; + + this->MII=max(ResII, RecII); + + this->computeNodeProperty(bb); + if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess) + this->dumpNodeProperty(); + + this->orderNodes(); + + if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess) + this->dump(); + //this->instrScheduling(); + + //this->dumpScheduling(); + + + } +} + +ModuloSchedGraphNode* ModuloSchedGraph::getNode (const unsigned nodeId) const +{ + for(const_iterator I=begin(), E=end();I !=E;I++) + if((*I).second->getNodeId() ==nodeId) + return (ModuloSchedGraphNode*)(*I).second; + return NULL; +} + +int ModuloSchedGraph::computeRecII(const BasicBlock* bb) +{ + + int RecII=0; + + //os<<"begining computerRecII()"<<"\n"; + + + //FIXME: only deal with circuits starting at the first node: the phi node nodeId=2; + + //search all elementary circuits in the dependance graph + //assume maximum number of nodes is MAXNODE + + unsigned path[MAXNODE]; + unsigned stack[MAXNODE][MAXNODE]; + + + for(int j=0;j<MAXNODE;j++) + {path[j]=0; + for(int k=0;k<MAXNODE;k++) + stack[j][k]=0; + } + //in our graph, the node number starts at 2 + + //number of nodes, because each instruction will result in one node + const unsigned numNodes= bb->size(); + + int i=0; + path[i]=2; + + ModuloSchedGraphNode* initNode=getNode(path[0]); + unsigned initNodeId=initNode->getNodeId(); + ModuloSchedGraphNode* currentNode=initNode; + + while(currentNode != NULL) + { + unsigned currentNodeId=currentNode->getNodeId(); + // modSched_os<<"current node is "<<currentNodeId<<"\n"; + + ModuloSchedGraphNode* nextNode=NULL; + for(ModuloSchedGraphNode::const_iterator I=currentNode->beginOutEdges(), E=currentNode->endOutEdges(); I !=E; I++) + { + //modSched_os <<" searching in outgoint edges of node "<<currentNodeId<<"\n"; + unsigned nodeId=((SchedGraphEdge*) *I)->getSink()->getNodeId(); + bool inpath=false,instack=false; + int k; + + //modSched_os<<"nodeId is "<<nodeId<<"\n"; + + k=-1; + while(path[++k] !=0) + if(nodeId == path[k]) + {inpath=true;break;} + + + + k=-1; + while(stack[i][++k] !=0 ) + if(nodeId == stack[i][k]) + {instack =true; break;} + + + if( nodeId > currentNodeId && !inpath && !instack) + {nextNode=(ModuloSchedGraphNode*) ((SchedGraphEdge*)*I)->getSink();break;} + + } + if(nextNode != NULL) + { + //modSched_os<<"find the next Node "<<nextNode->getNodeId()<<"\n"; + + + int j=0; + while( stack[i][j] !=0) j++; + stack[i][j]=nextNode->getNodeId(); + + + i++; + path[i]=nextNode->getNodeId(); + currentNode=nextNode; + } + else + { + //modSched_os<<"no expansion any more"<<"\n"; + //confirmCircuit(); + for(ModuloSchedGraphNode::const_iterator I=currentNode->beginOutEdges(), E=currentNode->endOutEdges() ; I != E; I++) + { + unsigned nodeId=((SchedGraphEdge*)*I)->getSink()->getNodeId(); + if(nodeId == initNodeId) + { + + int j=-1; + while(circuits[++j][0] !=0); + for(int k=0;k<MAXNODE;k++)circuits[j][k]=path[k]; + + } + } + //remove this node in the path and clear the corresponding entries in the stack + path[i]=0; + int j=0; + for(j=0;j<MAXNODE;j++)stack[i][j]=0; + i--; + currentNode=getNode(path[i]); + } + if(i==0) + { + + if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess) + modSched_os<<"circuits found are: "<<"\n"; + int j=-1; + while(circuits[++j][0] !=0){ + int k=-1; + while(circuits[j][++k] !=0) + if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess) + modSched_os<< circuits[j][k]<<"\t"; + if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess) + modSched_os<<"\n"; + + + //for this circuit, compute the sum of all edge delay + int sumDelay=0; + k=-1; + while(circuits[j][++k] !=0) + { + //ModuloSchedGraphNode* node =getNode(circuits[j][k]); + unsigned nextNodeId; + nextNodeId =circuits[j][k+1] !=0? circuits[j][k+1]:circuits[j][0]; + + /* + for(ModuloSchedGraphNode::const_iterator I=node->beginOutEdges(), E=node->endOutEdges();I !=E; I++) + { + SchedGraphEdge* edge= *I; + if(edge->getSink()->getNodeId() == nextNodeId) + {sumDelay += edge->getMinDelay();break;} + } + */ + + sumDelay += getMaxDelayEdge(circuits[j][k],nextNodeId)->getMinDelay(); + + } + // assume we have distance 1, in this case the sumDelay is RecII + // this is correct for SSA form only + // + if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess) + modSched_os<<"The total Delay in the circuit is " <<sumDelay<<"\n"; + + RecII= RecII > sumDelay? RecII: sumDelay; + + } + return RecII; + } + + } + + return -1; +} + +void ModuloSchedGraph::addResourceUsage(std::vector<pair<int,int> >& ruVec, int rid){ + //modSched_os<<"\nadding a resouce , current resouceUsage vector size is "<<ruVec.size()<<"\n"; + bool alreadyExists=false; + for(unsigned i=0;i <ruVec.size() ; i++){ + if(rid == ruVec[i].first) { + ruVec[i].second++; + alreadyExists=true; + break; + } + } + if( !alreadyExists) ruVec.push_back(std::make_pair(rid, 1)); + //modSched_os<<"current resouceUsage vector size is "<<ruVec.size()<<"\n"; + +} +void ModuloSchedGraph::dumpResourceUsage(std::vector< pair<int,int> > &ru) +{ + MachineSchedInfo& msi = (MachineSchedInfo&)target.getSchedInfo(); + + std::vector<pair<int,int> > resourceNumVector = msi.resourceNumVector; + modSched_os <<"resourceID\t"<<"resourceNum"<<"\n"; + for(unsigned i=0;i<resourceNumVector.size();i++) + modSched_os <<resourceNumVector[i].first<<"\t"<<resourceNumVector[i].second<<"\n"; + + modSched_os <<" maxNumIssueTotal(issue slot in one cycle) = "<<msi.maxNumIssueTotal<<"\n"; + modSched_os<<"resourceID\t resourceUsage\t ResourceNum"<<"\n"; + for(unsigned i=0;i<ru.size();i++){ + modSched_os<<ru[i].first<<"\t"<<ru[i].second; + const unsigned resNum=msi.getCPUResourceNum(ru[i].first); + modSched_os<<"\t"<<resNum<<"\n"; + } +} + +int ModuloSchedGraph::computeResII(const BasicBlock* bb) +{ + + const MachineInstrInfo& mii = target.getInstrInfo(); + const MachineSchedInfo& msi = target.getSchedInfo(); + + int ResII; + std::vector<pair<int,int> > resourceUsage; //pair<int resourceid, int resourceUsageTimes_in_the_whole_block> + + //FIXME: number of functional units the target machine can provide should be get from Target + // here I temporary hardcode it + + for(BasicBlock::const_iterator I=bb->begin(),E=bb->end(); I !=E; I++) + { + if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess){ + modSched_os<<"machine instruction for llvm instruction( node "<<getGraphNodeForInst(I)->getNodeId()<<")" <<"\n"; + modSched_os<<"\t"<<*I; + } + MachineCodeForInstruction& tempMvec= MachineCodeForInstruction::get(I); + if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess) + modSched_os <<"size =" <<tempMvec.size()<<"\n"; + for(unsigned i=0;i< tempMvec.size();i++) + { + MachineInstr* minstr=tempMvec[i]; + + unsigned minDelay=mii.minLatency(minstr->getOpCode()); + InstrRUsage rUsage=msi.getInstrRUsage(minstr->getOpCode()); + InstrClassRUsage classRUsage=msi.getClassRUsage(mii.getSchedClass(minstr->getOpCode())); + unsigned totCycles= classRUsage.totCycles; + + std::vector<std::vector<resourceId_t> > resources + =rUsage.resourcesByCycle; + assert(totCycles == resources.size()); + if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess) + modSched_os <<"resources Usage for this Instr(totCycles=" << totCycles << ",mindLatency="<<mii.minLatency(minstr->getOpCode())<<"): "<< *minstr <<"\n"; + for(unsigned j=0;j< resources.size();j++){ + if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess) + modSched_os<<"cycle "<<j<<": "; + for(unsigned k=0;k< resources[j].size(); k++){ + if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess) + modSched_os<<"\t"<<resources[j][k]; + addResourceUsage(resourceUsage,resources[j][k]); + } + if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess) + modSched_os<<"\n"; + } + } + } + if(ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess) + this->dumpResourceUsage(resourceUsage); + + //compute ResII + ResII=0; + int issueSlots=msi.maxNumIssueTotal; + for(unsigned i=0;i<resourceUsage.size();i++){ + int resourceNum=msi.getCPUResourceNum(resourceUsage[i].first); + int useNum=resourceUsage[i].second; + double tempII; + if(resourceNum <= issueSlots) + tempII=ceil(1.0*useNum/resourceNum); + else + tempII=ceil(1.0*useNum/issueSlots); + ResII=max((int)tempII,ResII); + } + return ResII; +} + +ModuloSchedGraphSet::ModuloSchedGraphSet(const Function* function, const TargetMachine& target) + :method(function) +{ + buildGraphsForMethod(method,target); + +} + + +ModuloSchedGraphSet::~ModuloSchedGraphSet() +{ + //delete all the graphs + for(iterator I=begin(), E=end();I !=E; ++I) + delete *I; +} + +void ModuloSchedGraphSet::dump() const +{ + modSched_os << " ====== ModuloSched graphs for function `" <<method->getName() + << "' =========\n\n"; + for(const_iterator I=begin(); I != end(); ++I) + (*I)->dump(); + + modSched_os << "\n=========End graphs for funtion` "<<method->getName() + << "' ==========\n\n"; +} + +void ModuloSchedGraph::dump(const BasicBlock* bb) +{ + modSched_os<<"dumping basic block:"; + modSched_os<<(bb->hasName()?bb->getName(): "block") + <<" (" << bb << ")"<<"\n"; + +} + +void ModuloSchedGraph::dump(const BasicBlock* bb, std::ostream& os) +{ + os<<"dumping basic block:"; + os<<(bb->hasName()?bb->getName(): "block") + <<" (" << bb << ")"<<"\n"; +} + +void +ModuloSchedGraph::dump() const +{ + modSched_os << " ModuloSchedGraph for basic Blocks:"; + for(unsigned i=0, N =bbVec.size(); i< N; i++) + { + modSched_os<<(bbVec[i]->hasName()?bbVec[i]->getName(): "block") + <<" (" << bbVec[i] << ")" + << ((i==N-1)?"":", "); + } + + modSched_os <<"\n\n Actual Root nodes : "; + for (unsigned i=0, N=graphRoot->outEdges.size(); i < N; i++) + modSched_os << graphRoot->outEdges[i]->getSink()->getNodeId() + << ((i == N-1)? "" : ", "); + + modSched_os << "\n Graph Nodes:\n"; + //for (const_iterator I=begin(); I != end(); ++I) + //modSched_os << "\n" << *I->second; + unsigned numNodes=bbVec[0]->size(); + for(unsigned i=2;i< numNodes+2;i++) + { + ModuloSchedGraphNode* node= getNode(i); + modSched_os << "\n" << *node; + } + + modSched_os << "\n"; +} +void +ModuloSchedGraph::dumpNodeProperty() const +{ + const BasicBlock* bb=bbVec[0]; + unsigned numNodes=bb->size(); + for(unsigned i=2;i < numNodes+2;i++) + { + ModuloSchedGraphNode* node=getNode(i); + modSched_os<<"NodeId "<<node->getNodeId()<<"\t"; + modSched_os<<"ASAP "<<node->getASAP()<<"\t"; + modSched_os<<"ALAP "<<node->getALAP()<<"\t"; + modSched_os<<"mov " <<node->getMov() <<"\t"; + modSched_os<<"depth "<<node->getDepth()<<"\t"; + modSched_os<<"height "<<node->getHeight()<<"\t"; + modSched_os<<"\n"; + } +} + +void ModuloSchedGraphSet::buildGraphsForMethod (const Function *F, const TargetMachine& target) +{ + for(Function::const_iterator BI = F->begin(); BI != F->end(); ++BI) + addGraph(new ModuloSchedGraph(BI,target)); +} + +std::ostream &operator<<(std::ostream &os, const ModuloSchedGraphNode& node) +{ + os << std::string(8, ' ') + << "Node " << node.nodeId << " : " + << "latency = " << node.latency << "\n" << std::string(12, ' '); + + + + if (node.getInst() == NULL) + os << "(Dummy node)\n"; + else + { + os << *node.getInst() << "\n" << std::string(12, ' '); + os << node.inEdges.size() << " Incoming Edges:\n"; + for (unsigned i=0, N=node.inEdges.size(); i < N; i++) + os << std::string(16, ' ') << *node.inEdges[i]; + + os << std::string(12, ' ') << node.outEdges.size() + << " Outgoing Edges:\n"; + for (unsigned i=0, N=node.outEdges.size(); i < N; i++) + os << std::string(16, ' ') << *node.outEdges[i]; + } + + + return os; +} diff --git a/lib/CodeGen/ModuloScheduling/ModuloSchedGraph.h b/lib/CodeGen/ModuloScheduling/ModuloSchedGraph.h new file mode 100644 index 0000000..07fde3c --- /dev/null +++ b/lib/CodeGen/ModuloScheduling/ModuloSchedGraph.h @@ -0,0 +1,347 @@ +//===- ModuloSchedGraph.h - Represent a collection of data structures ----*- C++ -*-===// +// +// This header defines the primative classes that make up a data structure +// graph. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_MODULO_SCHED_GRAPH_H +#define LLVM_CODEGEN_MODULO_SCHED_GRAPH_H + +#include "Support/HashExtras.h" +#include "Support/GraphTraits.h" +#include "../InstrSched/SchedGraphCommon.h" +#include "llvm/Instruction.h" +#include "llvm/Target/TargetMachine.h" +#include <iostream> +using std::pair; + +//for debug information selecton +enum ModuloSchedDebugLevel_t{ + ModuloSched_NoDebugInfo, + ModuloSched_Disable, + ModuloSched_PrintSchedule, + ModuloSched_PrintScheduleProcess, +}; + +//===============------------------------------------------------------------------------ +///ModuloSchedGraphNode - Implement a data structure based on the SchedGraphNodeCommon +///this class stores informtion needed later to order the nodes in modulo scheduling +/// +class ModuloSchedGraphNode: public SchedGraphNodeCommon { +private: + //the corresponding instruction + const Instruction* inst; + + //whether this node's property(ASAP,ALAP, ...) has been computed + bool propertyComputed; + + //ASAP: the earliest time the node could be scheduled + //ALAP: the latest time the node couldbe scheduled + //depth: the depth of the node + //height: the height of the node + //mov: the mobility function, computed as ALAP - ASAP + //scheTime: the scheduled time if this node has been scheduled + //earlyStart: the earliest time to be tried to schedule the node + //lateStart: the latest time to be tried to schedule the node + int ASAP, ALAP, depth, height, mov; + int schTime; + int earlyStart,lateStart; + +public: + + //get the instruction + const Instruction* getInst() const { return inst;} + + //get the instruction op-code name + const char* getInstOpcodeName() const{ return inst->getOpcodeName();} + + //get the instruction op-code + const unsigned getInstOpcode() const { return inst->getOpcode();} + + //return whether the node is NULL + bool isNullNode() const{ return(inst== NULL);} + + //return whether the property of the node has been computed + bool getPropertyComputed() {return propertyComputed;} + + //set the propertyComputed + void setPropertyComputed(bool _propertyComputed) {propertyComputed = _propertyComputed;} + + //get the corresponding property + int getASAP(){ return ASAP;} + int getALAP(){ return ALAP;} + int getMov() { return mov;} + int getDepth(){return depth;} + int getHeight(){return height;} + int getSchTime(){return schTime;} + int getEarlyStart(){return earlyStart;} + int getLateStart() { return lateStart;} + void setEarlyStart(int _earlyStart) {earlyStart= _earlyStart;} + void setLateStart(int _lateStart) {lateStart= _lateStart;} + void setSchTime(int _time){schTime=_time;} + + private: + friend class ModuloSchedGraph; + friend class SchedGraphNode; + + //constructor: + //nodeId: the node id, unique within the each BasicBlock + //_bb: which BasicBlock the corresponding instruction belongs to + //_inst: the corresponding instruction + //indexInBB: the corresponding instruction's index in the BasicBlock + //target: the targetMachine + ModuloSchedGraphNode(unsigned int _nodeId, + const BasicBlock* _bb, + const Instruction* _inst, + int indexInBB, + const TargetMachine& target); + + + friend std::ostream& operator<<(std::ostream& os,const ModuloSchedGraphNode& edge); + +}; + + + + + +//FIXME: these two value should not be used +#define MAXNODE 100 +#define MAXCC 100 + + + +//===----------------------------------------------------------------------===// +/// ModuloSchedGraph- the data structure to store dependence between nodes +/// it catches data dependence and constrol dependence +/// +/// + +class ModuloSchedGraph: + public SchedGraphCommon, + protected hash_map<const Instruction*, ModuloSchedGraphNode*> +{ + +private: + //iteration Interval + int MII; + + //target machine + const TargetMachine& target; + + //the circuits in the dependence graph + unsigned circuits[MAXCC][MAXNODE]; + + //the order nodes + std::vector<ModuloSchedGraphNode*> oNodes; + + typedef std::vector<ModuloSchedGraphNode*> NodeVec; + + //the function to compute properties + void computeNodeASAP(const BasicBlock* bb); + void computeNodeALAP(const BasicBlock* bb); + void computeNodeMov(const BasicBlock* bb); + void computeNodeDepth(const BasicBlock* bb); + void computeNodeHeight(const BasicBlock* bb); + + //the function to compute node property + void computeNodeProperty(const BasicBlock* bb); + + //the function to sort nodes + void orderNodes(); + + //add the resource usage + void addResourceUsage(std::vector<pair<int,int> >&, int); + + //debug functions: + //dump circuits + void dumpCircuits(); + //dump the input set of nodes + void dumpSet(std::vector<ModuloSchedGraphNode*> set); + //dump the input resource usage table + void dumpResourceUsage(std::vector<pair<int,int> >&); + + public: + //help functions + + //get the maxium the delay between two nodes + SchedGraphEdge* getMaxDelayEdge(unsigned srcId, unsigned sinkId); + + //FIXME: + //get the predessor Set of the set + NodeVec predSet(NodeVec set, unsigned, unsigned); + NodeVec predSet(NodeVec set); + + //get the predessor set of the node + NodeVec predSet(ModuloSchedGraphNode* node, unsigned,unsigned); + NodeVec predSet(ModuloSchedGraphNode* node); + + //get the successor set of the set + NodeVec succSet(NodeVec set, unsigned, unsigned); + NodeVec succSet(NodeVec set); + + //get the succssor set of the node + NodeVec succSet(ModuloSchedGraphNode* node,unsigned, unsigned); + NodeVec succSet(ModuloSchedGraphNode* node); + + //return the uniton of the two vectors + NodeVec vectorUnion(NodeVec set1,NodeVec set2 ); + + //return the consjuction of the two vectors + NodeVec vectorConj(NodeVec set1,NodeVec set2 ); + + //return all nodes in set1 but not set2 + NodeVec vectorSub(NodeVec set1, NodeVec set2); + + typedef hash_map<const Instruction*, ModuloSchedGraphNode*> map_base; +public: + using map_base::iterator; + using map_base::const_iterator; + +public: + + //get target machine + const TargetMachine& getTarget(){return target;} + + //get the iteration interval + const int getMII(){return MII;} + + //get the ordered nodes + const NodeVec& getONodes(){return oNodes;} + + //get the number of nodes (including the root and leaf) + //note: actually root and leaf is not used + const unsigned int getNumNodes() const {return size()+2;} + + //return wether the BasicBlock 'bb' contains a loop + bool isLoop (const BasicBlock* bb); + + //return this basibBlock contains a loop + bool isLoop (); + + + //return the node for the input instruction + ModuloSchedGraphNode* getGraphNodeForInst(const Instruction* inst) const{ + const_iterator onePair = this->find(inst); + return (onePair != this->end())?(*onePair).second: NULL; + } + + //Debugging support + //dump the graph + void dump() const; + //dump the basicBlock + void dump(const BasicBlock* bb); + //dump the basicBlock into 'os' stream + void dump(const BasicBlock* bb, std::ostream& os); + //dump the node property + void dumpNodeProperty() const ; + + private: + friend class ModuloSchedGraphSet; //give access to ctor + + public: + /*ctr*/ + ModuloSchedGraph(const BasicBlock* bb, const TargetMachine& _target) + :SchedGraphCommon(bb), target(_target){ + buildGraph(target); + } + + /*dtr*/ + ~ModuloSchedGraph(){ + for(const_iterator I=begin(); I!=end(); ++I) + delete I->second; + } + + //unorder iterators + //return values are pair<const Instruction*, ModuloSchedGraphNode*> + using map_base::begin; + using map_base::end; + + + + inline void noteModuloSchedGraphNodeForInst(const Instruction* inst, + ModuloSchedGraphNode* node) + { + assert((*this)[inst] ==NULL); + (*this)[inst]=node; + } + + //Graph builder + + ModuloSchedGraphNode* getNode (const unsigned nodeId) const; + + //build the graph from the basicBlock + void buildGraph (const TargetMachine& target); + + //Build nodes for BasicBlock + void buildNodesforBB (const TargetMachine& target, + const BasicBlock* bb, + NodeVec& memNode, + RegToRefVecMap& regToRefVecMap, + ValueToDefVecMap& valueToDefVecMap); + + //find definitiona and use information for all nodes + void findDefUseInfoAtInstr (const TargetMachine& target, + ModuloSchedGraphNode* node, + NodeVec& memNode, + RegToRefVecMap& regToRefVecMap, + ValueToDefVecMap& valueToDefVecMap); + + //add def-use edge + void addDefUseEdges (const BasicBlock* bb); + + //add control dependence edges + void addCDEdges (const BasicBlock* bb); + + //add memory dependence dges + void addMemEdges (const BasicBlock* bb); + + //add dummy edges + void addDummyEdges(); + + //computer source restrictoin II + int computeResII (const BasicBlock* bb); + + //computer recurrence II + int computeRecII (const BasicBlock* bb); +}; + +///==================================- +//gragh set + +class ModuloSchedGraphSet: + public std::vector<ModuloSchedGraph*> +{ +private: + const Function* method; + +public: + typedef std::vector<ModuloSchedGraph*> baseVector; + using baseVector::iterator; + using baseVector::const_iterator; + +public: + /*ctor*/ ModuloSchedGraphSet (const Function* function, const TargetMachine& target); + + /*dtor*/ ~ModuloSchedGraphSet (); + + //iterators + using baseVector::begin; + using baseVector::end; + + + //Debugging support + void dump() const; + +private: + inline void addGraph(ModuloSchedGraph* graph){ + assert(graph !=NULL); + this->push_back(graph); + } + + //Graph builder + void buildGraphsForMethod (const Function *F, const TargetMachine& target); +}; + +#endif diff --git a/lib/CodeGen/ModuloScheduling/ModuloScheduling.cpp b/lib/CodeGen/ModuloScheduling/ModuloScheduling.cpp new file mode 100644 index 0000000..3f4002a --- /dev/null +++ b/lib/CodeGen/ModuloScheduling/ModuloScheduling.cpp @@ -0,0 +1,899 @@ + +//===- SPLInstrScheduling.cpp - Modulo Software Pipelining Instruction Scheduling support -------===// +// +// this file implements the llvm/CodeGen/ModuloScheduling.h interface +// +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineCodeForInstruction.h" +#include "llvm/CodeGen/MachineCodeForBasicBlock.h" +#include "llvm/CodeGen/MachineCodeForMethod.h" +#include "llvm/Analysis/LiveVar/FunctionLiveVarInfo.h" // FIXME: Remove when modularized better +#include "llvm/Target/TargetMachine.h" +#include "llvm/BasicBlock.h" +#include "llvm/Instruction.h" +#include "Support/CommandLine.h" +#include <algorithm> +#include "ModuloSchedGraph.h" +#include "ModuloScheduling.h" +#include "llvm/Target/MachineSchedInfo.h" +#include "llvm/BasicBlock.h" +#include "llvm/iTerminators.h" +#include "llvm/iPHINode.h" +#include "llvm/Constants.h" +#include <iostream> +#include <swig.h> +#include <fstream> +#include "llvm/CodeGen/InstrSelection.h" + +#define max(x,y) (x>y?x:y) +#define min(x,y) (x<y?x:y) +using std::cerr; +using std::cout; +using std::ostream; +using std::ios; +using std::filebuf; + +//************************************************************ +//printing Debug information +//ModuloSchedDebugLevel stores the value of debug level +// modsched_os is the ostream to dump debug information, which is written into the file 'moduloSchedDebugInfo.output' +//see ModuloSchedulingPass::runOnFunction() +//************************************************************ + +ModuloSchedDebugLevel_t ModuloSchedDebugLevel; +static cl::opt<ModuloSchedDebugLevel_t, true> +SDL_opt("modsched", cl::Hidden, cl::location(ModuloSchedDebugLevel), + cl::desc("enable modulo scheduling debugging information"), + cl::values( + clEnumValN(ModuloSched_NoDebugInfo, "n", "disable debug output"), + clEnumValN(ModuloSched_Disable, "off", "disable modulo scheduling"), + clEnumValN(ModuloSched_PrintSchedule, "psched", "print original and new schedule"), + clEnumValN(ModuloSched_PrintScheduleProcess,"pschedproc", "print how the new schdule is produced"), + 0)); + +filebuf modSched_fb; +ostream modSched_os(&modSched_fb); + +//************************************************************ + + +///the method to compute schedule and instert epilogue and prologue +void ModuloScheduling::instrScheduling(){ + + if( ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess) + modSched_os<<"*************************computing modulo schedule ************************\n"; + + + const MachineSchedInfo& msi=target.getSchedInfo(); + + //number of issue slots in the in each cycle + int numIssueSlots=msi.maxNumIssueTotal; + + + + //compute the schedule + bool success=false; + while(!success) + { + //clear memory from the last round and initialize if necessary + clearInitMem(msi); + + //compute schedule and coreSchedule with the current II + success=computeSchedule(); + + if(!success){ + II++; + if( ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess) + modSched_os<<"increase II to "<<II<<"\n"; + } + } + + //print the final schedule if necessary + if( ModuloSchedDebugLevel >= ModuloSched_PrintSchedule) + dumpScheduling(); + + + //the schedule has been computed + //create epilogue, prologue and kernel BasicBlock + + //find the successor for this BasicBlock + BasicBlock* succ_bb= getSuccBB(bb); + + //print the original BasicBlock if necessary + if( ModuloSchedDebugLevel >= ModuloSched_PrintSchedule){ + modSched_os<<"dumping the orginal block\n"; + graph.dump(bb); + } + + //construction of prologue, kernel and epilogue + BasicBlock* kernel=bb->splitBasicBlock(bb->begin()); + BasicBlock* prologue= bb; + BasicBlock* epilogue=kernel->splitBasicBlock(kernel->begin()); + + + //construct prologue + constructPrologue(prologue); + + //construct kernel + constructKernel(prologue,kernel,epilogue); + + //construct epilogue + constructEpilogue(epilogue,succ_bb); + + + //print the BasicBlocks if necessary + if( ModuloSchedDebugLevel >= ModuloSched_PrintSchedule){ + modSched_os<<"dumping the prologue block:\n"; + graph.dump(prologue); + modSched_os<<"dumping the kernel block\n"; + graph.dump(kernel); + modSched_os<<"dumping the epilogue block\n"; + graph.dump(epilogue); + } + +} + +//clear memory from the last round and initialize if necessary +void ModuloScheduling::clearInitMem(const MachineSchedInfo& msi){ + + + unsigned numIssueSlots = msi.maxNumIssueTotal; + //clear nodeScheduled from the last round + if( ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess){ + modSched_os<< "***** new round with II= "<<II<<" *******************"<<endl; + modSched_os<< " **************clear the vector nodeScheduled**************** \n"; + } + nodeScheduled.clear(); + + + //clear resourceTable from the last round and reset it + resourceTable.clear(); + for(unsigned i=0;i< II;i++) + resourceTable.push_back(msi.resourceNumVector); + + + //clear the schdule and coreSchedule from the last round + schedule.clear(); + coreSchedule.clear(); + + //create a coreSchedule of size II*numIssueSlots + //each entry is NULL + while( coreSchedule.size() < II){ + std::vector<ModuloSchedGraphNode*>* newCycle=new std::vector<ModuloSchedGraphNode*>(); + for(unsigned k=0;k<numIssueSlots;k++) + newCycle->push_back(NULL); + coreSchedule.push_back(*newCycle); + } +} + + +//compute schedule and coreSchedule with the current II +bool ModuloScheduling::computeSchedule(){ + + if( ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess) + modSched_os <<"start to compute schedule \n"; + + //loop over the ordered nodes + for(NodeVec::const_iterator I=oNodes.begin();I!=oNodes.end();I++) + { + //try to schedule for node I + if( ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess) + dumpScheduling(); + ModuloSchedGraphNode* node=*I; + + //compute whether this node has successor(s) + bool succ=true; + + //compute whether this node has predessor(s) + bool pred=true; + + NodeVec schSucc=graph.vectorConj(nodeScheduled,graph.succSet(node)); + if(schSucc.empty()) + succ=false; + NodeVec schPred=graph.vectorConj(nodeScheduled,graph.predSet(node)); + if(schPred.empty()) + pred=false; + + //startTime: the earliest time we will try to schedule this node + //endTime: the latest time we will try to schedule this node + int startTime, endTime; + + //node's earlyStart: possible earliest time to schedule this node + //node's lateStart: possible latest time to schedule this node + node->setEarlyStart(-1); + node->setLateStart(9999); + + + //this node has predessor but no successor + if(!succ && pred){ + + //this node's earlyStart is it's predessor's schedule time + the edge delay + // - the iteration difference* II + for(unsigned i=0;i<schPred.size();i++){ + ModuloSchedGraphNode* predNode=schPred[i]; + SchedGraphEdge* edge=graph.getMaxDelayEdge(predNode->getNodeId(),node->getNodeId()); + int temp=predNode->getSchTime()+edge->getMinDelay() - edge->getIteDiff()*II; + node->setEarlyStart( max(node->getEarlyStart(),temp)); + } + startTime=node->getEarlyStart(); + endTime=node->getEarlyStart()+II-1; + } + + + //this node has successor but no predessor + if(succ && !pred){ + for(unsigned i=0;i<schSucc.size();i++){ + ModuloSchedGraphNode* succNode=schSucc[i]; + SchedGraphEdge* edge=graph.getMaxDelayEdge(succNode->getNodeId(),node->getNodeId()); + int temp=succNode->getSchTime() - edge->getMinDelay() + edge->getIteDiff()*II; + node->setLateStart(min(node->getEarlyStart(),temp)); + } + startTime=node->getLateStart()- II+1; + endTime=node->getLateStart(); + } + + //this node has both successors and predessors + if(succ && pred) + { + for(unsigned i=0;i<schPred.size();i++){ + ModuloSchedGraphNode* predNode=schPred[i]; + SchedGraphEdge* edge=graph.getMaxDelayEdge(predNode->getNodeId(),node->getNodeId()); + int temp=predNode->getSchTime()+edge->getMinDelay() - edge->getIteDiff()*II; + node->setEarlyStart(max(node->getEarlyStart(),temp)); + } + for(unsigned i=0;i<schSucc.size();i++){ + ModuloSchedGraphNode* succNode=schSucc[i]; + SchedGraphEdge* edge=graph.getMaxDelayEdge(succNode->getNodeId(),node->getNodeId()); + int temp=succNode->getSchTime() - edge->getMinDelay() + edge->getIteDiff()*II; + node->setLateStart(min(node->getEarlyStart(),temp)); + } + startTime=node->getEarlyStart(); + endTime=min(node->getLateStart(),node->getEarlyStart()+((int)II)-1); + } + + //this node has no successor or predessor + if(!succ && !pred){ + node->setEarlyStart(node->getASAP()); + startTime=node->getEarlyStart(); + endTime=node->getEarlyStart()+II -1; + } + + //try to schedule this node based on the startTime and endTime + if( ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess) + modSched_os<<"scheduling the node "<<(*I)->getNodeId()<<"\n"; + + bool success= this->ScheduleNode(node,startTime, endTime,nodeScheduled); + if(!success)return false; + } + return true; +} + + +//get the successor of the BasicBlock +BasicBlock* ModuloScheduling::getSuccBB(BasicBlock* bb){ + + BasicBlock* succ_bb; + for(unsigned i=0;i < II; i++) + for(unsigned j=0;j< coreSchedule[i].size();j++) + if(coreSchedule[i][j]){ + const Instruction* ist=coreSchedule[i][j]->getInst(); + + //we can get successor from the BranchInst instruction + //assume we only have one successor (besides itself) here + if(BranchInst::classof(ist)){ + BranchInst* bi=(BranchInst*)ist; + assert(bi->isConditional()&&"the branchInst is not a conditional one"); + assert(bi->getNumSuccessors() ==2&&" more than two successors?"); + BasicBlock* bb1=bi->getSuccessor(0); + BasicBlock* bb2=bi->getSuccessor(1); + assert( (bb1 == bb|| bb2 == bb) && " None of its successor is itself?"); + if(bb1 == bb) succ_bb=bb2; + else succ_bb=bb1; + return succ_bb; + } + } + assert( 0 && "NO Successor?"); + return NULL; +} + + +//get the predecessor of the BasicBlock +BasicBlock* ModuloScheduling::getPredBB(BasicBlock* bb){ + + BasicBlock* pred_bb; + + for(unsigned i=0;i < II; i++) + for(unsigned j=0;j< coreSchedule[i].size();j++) + if(coreSchedule[i][j]){ + const Instruction* ist=coreSchedule[i][j]->getInst(); + + //we can get predecessor from the PHINode instruction + //assume we only have one predecessor (besides itself) here + if(PHINode::classof(ist)){ + PHINode* phi=(PHINode*) ist; + assert(phi->getNumIncomingValues() == 2 &&" the number of incoming value is not equal to two? "); + BasicBlock* bb1= phi->getIncomingBlock(0); + BasicBlock* bb2= phi->getIncomingBlock(1); + assert( (bb1 == bb || bb2 == bb) && " None of its predecessor is itself?"); + if(bb1 == bb) pred_bb=bb2; + else pred_bb=bb1; + return pred_bb; + } + } + assert(0 && " no predecessor?"); + return NULL; +} + + +//construct the prologue +void ModuloScheduling::constructPrologue(BasicBlock* prologue){ + + InstListType& prologue_ist = prologue->getInstList(); + vvNodeType& tempSchedule_prologue= *(new vector< std::vector<ModuloSchedGraphNode*> >(schedule)); + + //compute the schedule for prologue + unsigned round=0; + unsigned scheduleSize=schedule.size(); + while(round < scheduleSize/II){ + round++; + for(unsigned i=0;i < scheduleSize ;i++){ + if(round*II + i >= scheduleSize) break; + for(unsigned j=0;j < schedule[i].size(); j++) + if(schedule[i][j]){ + assert( tempSchedule_prologue[round*II +i ][j] == NULL && "table not consitant with core table"); + + //move the schedule one iteration ahead and overlap with the original one + tempSchedule_prologue[round*II + i][j]=schedule[i][j]; + } + } + } + + //clear the clone memory in the core schedule instructions + clearCloneMemory(); + + //fill in the prologue + for(unsigned i=0;i < ceil(1.0*scheduleSize/II -1)*II ;i++) + for(unsigned j=0;j < tempSchedule_prologue[i].size();j++) + if(tempSchedule_prologue[i][j]){ + + //get the instruction + Instruction* orn=(Instruction*)tempSchedule_prologue[i][j]->getInst(); + + //made a clone of it + Instruction* cln=cloneInstSetMemory(orn); + + //insert the instruction + prologue_ist.insert(prologue_ist.back(),cln ); + + //if there is PHINode in the prologue, the incoming value from itself should be removed + //because it is not a loop any longer + if( PHINode::classof(cln)){ + PHINode* phi=(PHINode*)cln; + phi->removeIncomingValue(phi->getParent()); + } + } +} + + +//construct the kernel BasicBlock +void ModuloScheduling::constructKernel(BasicBlock* prologue,BasicBlock* kernel,BasicBlock* epilogue){ + + //*************fill instructions in the kernel**************** + InstListType& kernel_ist = kernel->getInstList(); + BranchInst* brchInst; + PHINode* phiInst, *phiCln; + + for(unsigned i=0;i<coreSchedule.size();i++) + for(unsigned j=0;j<coreSchedule[i].size();j++) + if(coreSchedule[i][j]){ + + //we should take care of branch instruction differently with normal instructions + if(BranchInst::classof(coreSchedule[i][j]->getInst())){ + brchInst=(BranchInst*)coreSchedule[i][j]->getInst(); + continue; + } + + //we should take care of PHINode instruction differently with normal instructions + if( PHINode::classof(coreSchedule[i][j]->getInst())){ + phiInst= (PHINode*)coreSchedule[i][j]->getInst(); + Instruction* cln=cloneInstSetMemory(phiInst); + kernel_ist.insert(kernel_ist.back(),cln); + phiCln=(PHINode*)cln; + continue; + } + + //for normal instructions: made a clone and insert it in the kernel_ist + Instruction* cln=cloneInstSetMemory( (Instruction*)coreSchedule[i][j]->getInst()); + kernel_ist.insert(kernel_ist.back(),cln); + } + + //the two incoming BasicBlock for PHINode is the prologue and the kernel (itself) + phiCln->setIncomingBlock(0,prologue); + phiCln->setIncomingBlock(1,kernel); + + //the incoming value for the kernel (itself) is the new value which is computed in the kernel + Instruction* originalVal=(Instruction*)phiInst->getIncomingValue(1); + phiCln->setIncomingValue(1, originalVal->getClone()); + + + //make a clone of the branch instruction and insert it in the end + BranchInst* cln=(BranchInst*)cloneInstSetMemory( brchInst); + kernel_ist.insert(kernel_ist.back(),cln); + + //delete the unconditional branch instruction, which is generated when splitting the basicBlock + kernel_ist.erase( --kernel_ist.end()); + + //set the first successor to itself + ((BranchInst*)cln)->setSuccessor(0, kernel); + //set the second successor to eiplogue + ((BranchInst*)cln)->setSuccessor(1,epilogue); + + //*****change the condition******* + + //get the condition instruction + Instruction* cond=(Instruction*)cln->getCondition(); + + //get the condition's second operand, it should be a constant + Value* operand=cond->getOperand(1); + assert(ConstantSInt::classof(operand)); + + //change the constant in the condtion instruction + ConstantSInt* iteTimes=ConstantSInt::get(operand->getType(),((ConstantSInt*)operand)->getValue()-II+1); + cond->setOperand(1,iteTimes); + +} + + + + + +//construct the epilogue +void ModuloScheduling::constructEpilogue(BasicBlock* epilogue, BasicBlock* succ_bb){ + + //compute the schedule for epilogue + vvNodeType& tempSchedule_epilogue= *(new vector< std::vector<ModuloSchedGraphNode*> >(schedule)); + unsigned scheduleSize=schedule.size(); + int round =0; + while(round < ceil(1.0*scheduleSize/II )-1 ){ + round++; + for( unsigned i=0;i < scheduleSize ; i++){ + if(i + round *II >= scheduleSize) break; + for(unsigned j=0;j < schedule[i].size();j++) + if(schedule[i + round*II ][j]){ + assert( tempSchedule_epilogue[i][j] == NULL && "table not consitant with core table"); + + //move the schdule one iteration behind and overlap + tempSchedule_epilogue[i][j]=schedule[i + round*II][j]; + } + } + } + + //fill in the epilogue + InstListType& epilogue_ist = epilogue->getInstList(); + for(unsigned i=II;i <scheduleSize ;i++) + for(unsigned j=0;j < tempSchedule_epilogue[i].size();j++) + if(tempSchedule_epilogue[i][j]){ + Instruction* inst=(Instruction*)tempSchedule_epilogue[i][j]->getInst(); + + //BranchInst and PHINode should be treated differently + //BranchInst:unecessary, simly omitted + //PHINode: omitted + if( !BranchInst::classof(inst) && ! PHINode::classof(inst) ){ + //make a clone instruction and insert it into the epilogue + Instruction* cln=cloneInstSetMemory(inst); + epilogue_ist.push_front(cln); + } + } + + + //*************delete the original instructions****************// + //to delete the original instructions, we have to make sure their use is zero + + //update original core instruction's uses, using its clone instread + for(unsigned i=0;i < II; i++) + for(unsigned j=0;j < coreSchedule[i].size() ;j++){ + if(coreSchedule[i][j]) + updateUseWithClone((Instruction*)coreSchedule[i][j]->getInst() ); + } + + //erase these instructions + for(unsigned i=0;i < II; i++) + for(unsigned j=0;j < coreSchedule[i].size();j++) + if(coreSchedule[i][j]){ + Instruction* ist=(Instruction*)coreSchedule[i][j]->getInst(); + ist->getParent()->getInstList().erase(ist); + } + //**************************************************************// + + + //finally, insert an unconditional branch instruction at the end + epilogue_ist.push_back(new BranchInst(succ_bb)); + +} + + +//---------------------------------------------------------------------------------------------- +//this function replace the value(instruction) ist in other instructions with its latest clone +//i.e. after this function is called, the ist is not used anywhere and it can be erased. +//---------------------------------------------------------------------------------------------- +void ModuloScheduling::updateUseWithClone(Instruction* ist){ + + while(ist->use_size() >0){ + bool destroyed=false; + + //other instruction is using this value ist + assert(Instruction::classof(*ist->use_begin())); + Instruction *inst=(Instruction*)(* ist->use_begin()); + + for(unsigned i=0;i<inst->getNumOperands();i++) + if(inst->getOperand(i) == ist && ist->getClone()){ + + //if the instruction is TmpInstruction, simly delete it because it has no parent + // and it does not belongs to any BasicBlock + if(TmpInstruction::classof(inst)) { + delete inst; + destroyed=true; + break; + } + + + //otherwise, set the instruction's operand to the value's clone + inst->setOperand(i, ist->getClone()); + + //the use from the original value ist is destroyed + destroyed=true; + break; + } + if( !destroyed) + { + //if the use can not be destroyed , something is wrong + inst->dump(); + assert( 0 &&"this use can not be destroyed"); + } + } + +} + + +//******************************************************** +//this function clear all clone mememoy +//i.e. set all instruction's clone memory to NULL +//***************************************************** +void ModuloScheduling::clearCloneMemory(){ +for(unsigned i=0; i < coreSchedule.size();i++) + for(unsigned j=0;j<coreSchedule[i].size();j++) + if(coreSchedule[i][j]) ((Instruction*)coreSchedule[i][j]->getInst())->clearClone(); + +} + + +//******************************************************************************** +//this function make a clone of the instruction orn +//the cloned instruction will use the orn's operands' latest clone as its operands +//it is done this way because LLVM is in SSA form and we should use the correct value +// +//this fuction also update the instruction orn's latest clone memory +//********************************************************************************** +Instruction* ModuloScheduling::cloneInstSetMemory(Instruction* orn) { + +//make a clone instruction + Instruction* cln=orn->clone(); + + + //update the operands + for(unsigned k=0;k<orn->getNumOperands();k++){ + const Value* op=orn->getOperand(k); + if(Instruction::classof(op) && ((Instruction*)op)->getClone()){ + Instruction* op_inst=(Instruction*)op; + cln->setOperand(k, op_inst->getClone()); + } + } + + //update clone memory + orn->setClone(cln); + return cln; +} + + + +bool ModuloScheduling::ScheduleNode(ModuloSchedGraphNode* node,unsigned start, unsigned end, NodeVec& nodeScheduled) +{ + + const MachineSchedInfo& msi=target.getSchedInfo(); + unsigned int numIssueSlots=msi.maxNumIssueTotal; + + if( ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess) + modSched_os<<"startTime= "<<start<<" endTime= "<<end<<"\n"; + bool isScheduled=false; + for(unsigned i=start;i<= end;i++){ + if( ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess) + modSched_os<< " now try cycle " <<i<<":"<<"\n"; + for(unsigned j=0;j<numIssueSlots;j++){ + unsigned int core_i = i%II; + unsigned int core_j=j; + if( ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess) + modSched_os <<"\t Trying slot "<<j<<"..........."; + //check the resouce table, make sure there is no resource conflicts + const Instruction* instr=node->getInst(); + MachineCodeForInstruction& tempMvec= MachineCodeForInstruction::get(instr); + bool resourceConflict=false; + const MachineInstrInfo &mii=msi.getInstrInfo(); + + if(coreSchedule.size() < core_i+1 || !coreSchedule[core_i][core_j]){ + //this->dumpResourceUsageTable(); + int latency=0; + for(unsigned k=0;k< tempMvec.size();k++) + { + MachineInstr* minstr=tempMvec[k]; + InstrRUsage rUsage=msi.getInstrRUsage(minstr->getOpCode()); + std::vector<std::vector<resourceId_t> > resources + =rUsage.resourcesByCycle; + updateResourceTable(resources,i + latency); + latency +=max(mii.minLatency(minstr->getOpCode()),1) ; + } + + //this->dumpResourceUsageTable(); + + latency=0; + if( resourceTableNegative()){ + + //undo-update the resource table + for(unsigned k=0;k< tempMvec.size();k++){ + MachineInstr* minstr=tempMvec[k]; + InstrRUsage rUsage=msi.getInstrRUsage(minstr->getOpCode()); + std::vector<std::vector<resourceId_t> > resources + =rUsage.resourcesByCycle; + undoUpdateResourceTable(resources,i + latency); + latency +=max(mii.minLatency(minstr->getOpCode()),1) ; + } + resourceConflict=true; + } + } + if( !resourceConflict && !coreSchedule[core_i][core_j]){ + if( ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess){ + modSched_os <<" OK!"<<"\n"; + modSched_os<<"Node "<<node->getNodeId()<< " is scheduleed."<<"\n"; + } + //schedule[i][j]=node; + while(schedule.size() <= i){ + std::vector<ModuloSchedGraphNode*>* newCycle=new std::vector<ModuloSchedGraphNode*>(); + for(unsigned k=0;k<numIssueSlots;k++) + newCycle->push_back(NULL); + schedule.push_back(*newCycle); + } + vector<ModuloSchedGraphNode*>::iterator startIterator; + startIterator = schedule[i].begin(); + schedule[i].insert(startIterator+j,node); + startIterator = schedule[i].begin(); + schedule[i].erase(startIterator+j+1); + + //update coreSchedule + //coreSchedule[core_i][core_j]=node; + while(coreSchedule.size() <= core_i){ + std::vector<ModuloSchedGraphNode*>* newCycle=new std::vector<ModuloSchedGraphNode*>(); + for(unsigned k=0;k<numIssueSlots;k++) + newCycle->push_back(NULL); + coreSchedule.push_back(*newCycle); + } + + startIterator = coreSchedule[core_i].begin(); + coreSchedule[core_i].insert(startIterator+core_j,node); + startIterator = coreSchedule[core_i].begin(); + coreSchedule[core_i].erase(startIterator+core_j+1); + + node->setSchTime(i); + isScheduled=true; + nodeScheduled.push_back(node); + + break; + } + else if( coreSchedule[core_i][core_j]) { + if( ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess) + modSched_os <<" Slot not available "<<"\n"; + } + else{ + if( ModuloSchedDebugLevel >= ModuloSched_PrintScheduleProcess) + modSched_os <<" Resource conflicts"<<"\n"; + } + } + if(isScheduled) break; + } + //assert(nodeScheduled &&"this node can not be scheduled?"); + return isScheduled; +} + +void ModuloScheduling::updateResourceTable(std::vector<std::vector<unsigned int> > useResources, int startCycle){ + for(unsigned i=0;i< useResources.size();i++){ + int absCycle=startCycle+i; + int coreCycle=absCycle % II; + std::vector<pair<int,int> >& resourceRemained=resourceTable[coreCycle]; + std::vector<unsigned int>& resourceUsed= useResources[i]; + for(unsigned j=0;j< resourceUsed.size();j++){ + for(unsigned k=0;k< resourceRemained.size();k++) + if((int)resourceUsed[j] == resourceRemained[k].first){ + resourceRemained[k].second--; + } + } + } +} + +void ModuloScheduling::undoUpdateResourceTable(std::vector<std::vector<unsigned int> > useResources, int startCycle){ + for(unsigned i=0;i< useResources.size();i++){ + int absCycle=startCycle+i; + int coreCycle=absCycle % II; + std::vector<pair<int,int> >& resourceRemained=resourceTable[coreCycle]; + std::vector<unsigned int>& resourceUsed= useResources[i]; + for(unsigned j=0;j< resourceUsed.size();j++){ + for(unsigned k=0;k< resourceRemained.size();k++) + if((int)resourceUsed[j] == resourceRemained[k].first){ + resourceRemained[k].second++; + } + } + } +} + + +//----------------------------------------------------------------------- +//Function: resouceTableNegative +//return value: +// return false if any element in the resouceTable is negative +// otherwise return true +//Purpose: +// this function is used to determine if an instruction is eligible for schedule at certain cycle +//--------------------------------------------------------------------------------------- + +bool ModuloScheduling::resourceTableNegative(){ + assert(resourceTable.size() == (unsigned)II&& "resouceTable size must be equal to II"); + bool isNegative=false; + for(unsigned i=0; i < resourceTable.size();i++) + for(unsigned j=0;j < resourceTable[i].size();j++){ + if(resourceTable[i][j].second <0) { + isNegative=true; + break; + } + } + return isNegative; +} + + +//---------------------------------------------------------------------- +//Function: dumpResouceUsageTable +//Purpose: +// print out ResouceTable for debug +// +//------------------------------------------------------------------------ + +void ModuloScheduling::dumpResourceUsageTable(){ + modSched_os<<"dumping resource usage table"<<"\n"; + for(unsigned i=0;i< resourceTable.size();i++){ + for(unsigned j=0;j < resourceTable[i].size();j++) + modSched_os <<resourceTable[i][j].first<<":"<< resourceTable[i][j].second<<" "; + modSched_os <<"\n"; + } + +} + +//---------------------------------------------------------------------- +//Function: dumpSchedule +//Purpose: +// print out thisSchedule for debug +// +//----------------------------------------------------------------------- +void ModuloScheduling::dumpSchedule(std::vector< std::vector<ModuloSchedGraphNode*> > thisSchedule){ + + const MachineSchedInfo& msi=target.getSchedInfo(); + unsigned numIssueSlots=msi.maxNumIssueTotal; + for(unsigned i=0;i< numIssueSlots;i++) + modSched_os <<"\t#"; + modSched_os<<"\n"; + for(unsigned i=0;i < thisSchedule.size();i++) + { + modSched_os<<"cycle"<<i<<": "; + for(unsigned j=0;j<thisSchedule[i].size();j++) + if(thisSchedule[i][j]!= NULL) + modSched_os<<thisSchedule[i][j]->getNodeId()<<"\t"; + else + modSched_os<<"\t"; + modSched_os<<"\n"; + } + +} + + +//---------------------------------------------------- +//Function: dumpScheduling +//Purpose: +// print out the schedule and coreSchedule for debug +// +//------------------------------------------------------- + +void ModuloScheduling::dumpScheduling(){ + modSched_os<<"dump schedule:"<<"\n"; + const MachineSchedInfo& msi=target.getSchedInfo(); + unsigned numIssueSlots=msi.maxNumIssueTotal; + for(unsigned i=0;i< numIssueSlots;i++) + modSched_os <<"\t#"; + modSched_os<<"\n"; + for(unsigned i=0;i < schedule.size();i++) + { + modSched_os<<"cycle"<<i<<": "; + for(unsigned j=0;j<schedule[i].size();j++) + if(schedule[i][j]!= NULL) + modSched_os<<schedule[i][j]->getNodeId()<<"\t"; + else + modSched_os<<"\t"; + modSched_os<<"\n"; + } + + modSched_os<<"dump coreSchedule:"<<"\n"; + for(unsigned i=0;i< numIssueSlots;i++) + modSched_os <<"\t#"; + modSched_os<<"\n"; + for(unsigned i=0;i < coreSchedule.size();i++){ + modSched_os<<"cycle"<<i<<": "; + for(unsigned j=0;j< coreSchedule[i].size();j++) + if(coreSchedule[i][j] !=NULL) + modSched_os<<coreSchedule[i][j]->getNodeId()<<"\t"; + else + modSched_os<<"\t"; + modSched_os<<"\n"; + } +} + + + +//--------------------------------------------------------------------------- +// Function: ModuloSchedulingPass +// +// Purpose: +// Entry point for Modulo Scheduling +// Schedules LLVM instruction +// +//--------------------------------------------------------------------------- + +namespace { + class ModuloSchedulingPass : public FunctionPass { + const TargetMachine ⌖ + public: + ModuloSchedulingPass(const TargetMachine &T) : target(T) {} + const char *getPassName() const { return "Modulo Scheduling"; } + + // getAnalysisUsage - We use LiveVarInfo... + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + //AU.addRequired(FunctionLiveVarInfo::ID); + } + bool runOnFunction(Function &F); + }; +} // end anonymous namespace + + + +bool ModuloSchedulingPass::runOnFunction(Function &F) +{ + + //if necessary , open the output for debug purpose + if(ModuloSchedDebugLevel== ModuloSched_Disable) + return false; + + if(ModuloSchedDebugLevel>= ModuloSched_PrintSchedule){ + modSched_fb.open("moduloSchedDebugInfo.output", ios::out); + modSched_os<<"******************Modula Scheduling debug information*************************"<<endl; + } + + ModuloSchedGraphSet* graphSet = new ModuloSchedGraphSet(&F,target); + ModuloSchedulingSet ModuloSchedulingSet(*graphSet); + + if(ModuloSchedDebugLevel>= ModuloSched_PrintSchedule) + modSched_fb.close(); + + return false; +} + + +Pass *createModuloSchedulingPass(const TargetMachine &tgt) { + return new ModuloSchedulingPass(tgt); +} + diff --git a/lib/CodeGen/ModuloScheduling/ModuloScheduling.h b/lib/CodeGen/ModuloScheduling/ModuloScheduling.h new file mode 100644 index 0000000..9f9fcaa --- /dev/null +++ b/lib/CodeGen/ModuloScheduling/ModuloScheduling.h @@ -0,0 +1,160 @@ +//// - head file for the classes ModuloScheduling and ModuloScheduling ----*- C++ -*-===// +// +// This header defines the the classes ModuloScheduling and ModuloSchedulingSet 's structure +// +// +//===----------------------------------------------------------------------===// + + +#ifndef LLVM_CODEGEN_MODULOSCHEDULING_H +#define LLVM_CODEGEN_MODULOSCHEDULING_H + +#include "ModuloSchedGraph.h" +#include <iostream> + +class ModuloScheduling:NonCopyable { + private: + typedef std::vector<ModuloSchedGraphNode*> NodeVec; + + /// the graph to feed in + ModuloSchedGraph& graph; + const TargetMachine& target; + + //the BasicBlock to be scheduled + BasicBlock* bb; + + ///Iteration Intervel + ///FIXME: II may be a better name for its meaning + unsigned II; + + //the vector containing the nodes which have been scheduled + NodeVec nodeScheduled; + + ///the remaining unscheduled nodes + const NodeVec& oNodes; + + ///the machine resource table + std::vector< std::vector<pair<int,int> > > resourceTable ; + + ///the schedule( with many schedule stage) + std::vector<std::vector<ModuloSchedGraphNode*> > schedule; + + ///the kernel(core) schedule(length = II) + std::vector<std::vector<ModuloSchedGraphNode*> > coreSchedule; + + typedef BasicBlock::InstListType InstListType; + typedef std::vector <std::vector<ModuloSchedGraphNode*> > vvNodeType; + + + +public: + + ///constructor + ModuloScheduling(ModuloSchedGraph& _graph): + graph(_graph), + target(graph.getTarget()), + oNodes(graph.getONodes()) + { + II = graph.getMII(); + bb=(BasicBlock*)graph.getBasicBlocks()[0]; + + instrScheduling(); + }; + + ///destructor + ~ModuloScheduling(){}; + + ///the method to compute schedule and instert epilogue and prologue + void instrScheduling(); + + ///debug functions: + ///dump the schedule and core schedule + void dumpScheduling(); + + ///dump the input vector of nodes + //sch: the input vector of nodes + void dumpSchedule( std::vector<std::vector<ModuloSchedGraphNode*> > sch); + + ///dump the resource usage table + void dumpResourceUsageTable(); + + + //*******************internel functions******************************* +private: + //clear memory from the last round and initialize if necessary + void clearInitMem(const MachineSchedInfo& ); + + //compute schedule and coreSchedule with the current II + bool computeSchedule(); + + BasicBlock* getSuccBB(BasicBlock*); + BasicBlock* getPredBB(BasicBlock*); + void constructPrologue(BasicBlock* prologue); + void constructKernel(BasicBlock* prologue,BasicBlock* kernel,BasicBlock* epilogue); + void constructEpilogue(BasicBlock* epilogue,BasicBlock* succ_bb); + + ///update the resource table at the startCycle + //vec: the resouce usage + //startCycle: the start cycle the resouce usage is + void updateResourceTable(std::vector<vector<unsigned int> > vec,int startCycle); + + ///un-do the update in the resource table in the startCycle + //vec: the resouce usage + //startCycle: the start cycle the resouce usage is + void undoUpdateResourceTable(std::vector<vector<unsigned int> > vec,int startCycle); + + ///return whether the resourcetable has negative element + ///this function is called after updateResouceTable() to determine whether a node can + /// be scheduled at certain cycle + bool resourceTableNegative(); + + + ///try to Schedule the node starting from start to end cycle(inclusive) + //if it can be scheduled, put it in the schedule and update nodeScheduled + //node: the node to be scheduled + //start: start cycle + //end : end cycle + //nodeScheduled: a vector storing nodes which has been scheduled + bool ScheduleNode(ModuloSchedGraphNode* node,unsigned start, unsigned end, NodeVec& nodeScheduled); + + //each instruction has a memory of the latest clone instruction + //the clone instruction can be get using getClone() + //this function clears the memory, i.e. getClone() after calling this function returns null + void clearCloneMemory(); + + //this fuction make a clone of this input Instruction and update the clone memory + //inst: the instrution to be cloned + Instruction* cloneInstSetMemory(Instruction* inst); + + //this function update each instrutions which uses ist as its operand + //after update, each instruction will use ist's clone as its operand + void updateUseWithClone(Instruction* ist); + +}; + + +class ModuloSchedulingSet:NonCopyable{ + private: + + //the graphSet to feed in + ModuloSchedGraphSet& graphSet; + public: + + //constructor + //Scheduling graph one by one + ModuloSchedulingSet(ModuloSchedGraphSet _graphSet):graphSet(_graphSet){ + for(unsigned i=0;i<graphSet.size();i++){ + ModuloSchedGraph& graph=*(graphSet[i]); + if(graph.isLoop())ModuloScheduling ModuloScheduling(graph); + } + }; + + //destructor + ~ModuloSchedulingSet(){}; +}; + + + +#endif + + |