diff options
Diffstat (limited to 'lib/CodeGen')
42 files changed, 3210 insertions, 1136 deletions
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index b48b5af..7364f42 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -220,16 +220,16 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const { case GlobalValue::CommonLinkage: case GlobalValue::LinkOnceAnyLinkage: case GlobalValue::LinkOnceODRLinkage: + case GlobalValue::LinkOnceODRAutoHideLinkage: case GlobalValue::WeakAnyLinkage: case GlobalValue::WeakODRLinkage: case GlobalValue::LinkerPrivateWeakLinkage: - case GlobalValue::LinkerPrivateWeakDefAutoLinkage: if (MAI->getWeakDefDirective() != 0) { // .globl _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); if ((GlobalValue::LinkageTypes)Linkage != - GlobalValue::LinkerPrivateWeakDefAutoLinkage) + GlobalValue::LinkOnceODRAutoHideLinkage) // .weak_definition _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition); else diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index d231665..d30e5bb 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file contains support for writing dwarf compile unit. +// This file contains support for constructing a dwarf compile unit. // //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index d240389..2e189ad 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -61,6 +61,7 @@ add_llvm_library(LLVMCodeGen MachineSSAUpdater.cpp MachineScheduler.cpp MachineSink.cpp + MachineTraceMetrics.cpp MachineVerifier.cpp OcamlGC.cpp OptimizePHIs.cpp diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp index 9840a40..f9347ef 100644 --- a/lib/CodeGen/EarlyIfConversion.cpp +++ b/lib/CodeGen/EarlyIfConversion.cpp @@ -17,12 +17,14 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "early-ifcvt" +#include "MachineTraceMetrics.h" #include "llvm/Function.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SparseSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" @@ -30,6 +32,7 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/CommandLine.h" @@ -48,7 +51,10 @@ BlockInstrLimit("early-ifcvt-limit", cl::init(30), cl::Hidden, static cl::opt<bool> Stress("stress-early-ifcvt", cl::Hidden, cl::desc("Turn all knobs to 11")); -typedef SmallSetVector<MachineBasicBlock*, 8> BlockSetVector; +STATISTIC(NumDiamondsSeen, "Number of diamonds"); +STATISTIC(NumDiamondsConv, "Number of diamonds converted"); +STATISTIC(NumTrianglesSeen, "Number of triangles"); +STATISTIC(NumTrianglesConv, "Number of triangles converted"); //===----------------------------------------------------------------------===// // SSAIfConv @@ -94,6 +100,12 @@ public: /// equal to Tail. bool isTriangle() const { return TBB == Tail || FBB == Tail; } + /// Returns the Tail predecessor for the True side. + MachineBasicBlock *getTPred() const { return TBB == Tail ? Head : TBB; } + + /// Returns the Tail predecessor for the False side. + MachineBasicBlock *getFPred() const { return FBB == Tail ? Head : FBB; } + /// Information about each phi in the Tail block. struct PHIInfo { MachineInstr *PHI; @@ -132,6 +144,12 @@ private: /// Find a valid insertion point in Head. bool findInsertionPoint(); + /// Replace PHI instructions in Tail with selects. + void replacePHIInstrs(); + + /// Insert selects and rewrite PHI operands to use them. + void rewritePHIOperands(); + public: /// runOnMachineFunction - Initialize per-function data structures. void runOnMachineFunction(MachineFunction &MF) { @@ -335,11 +353,7 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) { if (Succ0->pred_size() != 1 || Succ0->succ_size() != 1) return false; - // We could support additional Tail predecessors by updating phis instead of - // eliminating them. Let's see an example where it matters first. Tail = Succ0->succ_begin()[0]; - if (Tail->pred_size() != 2) - return false; // This is not a triangle. if (Tail != Succ1) { @@ -389,8 +403,8 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) { // Any phis in the tail block must be convertible to selects. PHIs.clear(); - MachineBasicBlock *TPred = TBB == Tail ? Head : TBB; - MachineBasicBlock *FPred = FBB == Tail ? Head : FBB; + MachineBasicBlock *TPred = getTPred(); + MachineBasicBlock *FPred = getFPred(); for (MachineBasicBlock::iterator I = Tail->begin(), E = Tail->end(); I != E && I->isPHI(); ++I) { PHIs.push_back(&*I); @@ -426,24 +440,18 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) { if (!findInsertionPoint()) return false; + if (isTriangle()) + ++NumTrianglesSeen; + else + ++NumDiamondsSeen; return true; } - -/// convertIf - Execute the if conversion after canConvertIf has determined the -/// feasibility. -/// -/// Any basic blocks erased will be added to RemovedBlocks. -/// -void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) { - assert(Head && Tail && TBB && FBB && "Call canConvertIf first."); - - // Move all instructions into Head, except for the terminators. - if (TBB != Tail) - Head->splice(InsertionPoint, TBB, TBB->begin(), TBB->getFirstTerminator()); - if (FBB != Tail) - Head->splice(InsertionPoint, FBB, FBB->begin(), FBB->getFirstTerminator()); - +/// replacePHIInstrs - Completely replace PHI instructions with selects. +/// This is possible when the only Tail predecessors are the if-converted +/// blocks. +void SSAIfConv::replacePHIInstrs() { + assert(Tail->pred_size() == 2 && "Cannot replace PHIs"); MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator(); assert(FirstTerm != Head->end() && "No terminators"); DebugLoc HeadDL = FirstTerm->getDebugLoc(); @@ -459,6 +467,66 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) { PI.PHI->eraseFromParent(); PI.PHI = 0; } +} + +/// rewritePHIOperands - When there are additional Tail predecessors, insert +/// select instructions in Head and rewrite PHI operands to use the selects. +/// Keep the PHI instructions in Tail to handle the other predecessors. +void SSAIfConv::rewritePHIOperands() { + MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator(); + assert(FirstTerm != Head->end() && "No terminators"); + DebugLoc HeadDL = FirstTerm->getDebugLoc(); + + // Convert all PHIs to select instructions inserted before FirstTerm. + for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { + PHIInfo &PI = PHIs[i]; + DEBUG(dbgs() << "If-converting " << *PI.PHI); + unsigned PHIDst = PI.PHI->getOperand(0).getReg(); + unsigned DstReg = MRI->createVirtualRegister(MRI->getRegClass(PHIDst)); + TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg); + DEBUG(dbgs() << " --> " << *llvm::prior(FirstTerm)); + + // Rewrite PHI operands TPred -> (DstReg, Head), remove FPred. + for (unsigned i = PI.PHI->getNumOperands(); i != 1; i -= 2) { + MachineBasicBlock *MBB = PI.PHI->getOperand(i-1).getMBB(); + if (MBB == getTPred()) { + PI.PHI->getOperand(i-1).setMBB(Head); + PI.PHI->getOperand(i-2).setReg(DstReg); + } else if (MBB == getFPred()) { + PI.PHI->RemoveOperand(i-1); + PI.PHI->RemoveOperand(i-2); + } + } + DEBUG(dbgs() << " --> " << *PI.PHI); + } +} + +/// convertIf - Execute the if conversion after canConvertIf has determined the +/// feasibility. +/// +/// Any basic blocks erased will be added to RemovedBlocks. +/// +void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) { + assert(Head && Tail && TBB && FBB && "Call canConvertIf first."); + + // Update statistics. + if (isTriangle()) + ++NumTrianglesConv; + else + ++NumDiamondsConv; + + // Move all instructions into Head, except for the terminators. + if (TBB != Tail) + Head->splice(InsertionPoint, TBB, TBB->begin(), TBB->getFirstTerminator()); + if (FBB != Tail) + Head->splice(InsertionPoint, FBB, FBB->begin(), FBB->getFirstTerminator()); + + // Are there extra Tail predecessors? + bool ExtraPreds = Tail->pred_size() != 2; + if (ExtraPreds) + rewritePHIOperands(); + else + replacePHIInstrs(); // Fix up the CFG, temporarily leave Head without any successors. Head->removeSuccessor(TBB); @@ -470,6 +538,7 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) { // Fix up Head's terminators. // It should become a single branch or a fallthrough. + DebugLoc HeadDL = Head->getFirstTerminator()->getDebugLoc(); TII->RemoveBranch(*Head); // Erase the now empty conditional blocks. It is likely that Head can fall @@ -484,7 +553,7 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) { } assert(Head->succ_empty() && "Additional head successors?"); - if (Head->isLayoutSuccessor(Tail)) { + if (!ExtraPreds && Head->isLayoutSuccessor(Tail)) { // Splice Tail onto the end of Head. DEBUG(dbgs() << "Joining tail BB#" << Tail->getNumber() << " into head BB#" << Head->getNumber() << '\n'); @@ -512,9 +581,12 @@ namespace { class EarlyIfConverter : public MachineFunctionPass { const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; + const MCSchedModel *SchedModel; MachineRegisterInfo *MRI; MachineDominatorTree *DomTree; MachineLoopInfo *Loops; + MachineTraceMetrics *Traces; + MachineTraceMetrics::Ensemble *MinInstr; SSAIfConv IfConv; public: @@ -527,6 +599,8 @@ private: bool tryConvertIf(MachineBasicBlock*); void updateDomTree(ArrayRef<MachineBasicBlock*> Removed); void updateLoops(ArrayRef<MachineBasicBlock*> Removed); + void invalidateTraces(); + bool shouldConvertIf(); }; } // end anonymous namespace @@ -537,6 +611,7 @@ INITIALIZE_PASS_BEGIN(EarlyIfConverter, "early-ifcvt", "Early If Converter", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics) INITIALIZE_PASS_END(EarlyIfConverter, "early-ifcvt", "Early If Converter", false, false) @@ -546,6 +621,8 @@ void EarlyIfConverter::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<MachineDominatorTree>(); AU.addRequired<MachineLoopInfo>(); AU.addPreserved<MachineLoopInfo>(); + AU.addRequired<MachineTraceMetrics>(); + AU.addPreserved<MachineTraceMetrics>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -576,12 +653,117 @@ void EarlyIfConverter::updateLoops(ArrayRef<MachineBasicBlock*> Removed) { Loops->removeBlock(Removed[i]); } +/// Invalidate MachineTraceMetrics before if-conversion. +void EarlyIfConverter::invalidateTraces() { + Traces->verifyAnalysis(); + Traces->invalidate(IfConv.Head); + Traces->invalidate(IfConv.Tail); + Traces->invalidate(IfConv.TBB); + Traces->invalidate(IfConv.FBB); + Traces->verifyAnalysis(); +} + +// Adjust cycles with downward saturation. +static unsigned adjCycles(unsigned Cyc, int Delta) { + if (Delta < 0 && Cyc + Delta > Cyc) + return 0; + return Cyc + Delta; +} + +/// Apply cost model and heuristics to the if-conversion in IfConv. +/// Return true if the conversion is a good idea. +/// +bool EarlyIfConverter::shouldConvertIf() { + // Stress testing mode disables all cost considerations. + if (Stress) + return true; + + if (!MinInstr) + MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount); + + MachineTraceMetrics::Trace TBBTrace = MinInstr->getTrace(IfConv.getTPred()); + MachineTraceMetrics::Trace FBBTrace = MinInstr->getTrace(IfConv.getFPred()); + DEBUG(dbgs() << "TBB: " << TBBTrace << "FBB: " << FBBTrace); + unsigned MinCrit = std::min(TBBTrace.getCriticalPath(), + FBBTrace.getCriticalPath()); + + // Set a somewhat arbitrary limit on the critical path extension we accept. + unsigned CritLimit = SchedModel->MispredictPenalty/2; + + // If-conversion only makes sense when there is unexploited ILP. Compute the + // maximum-ILP resource length of the trace after if-conversion. Compare it + // to the shortest critical path. + SmallVector<const MachineBasicBlock*, 1> ExtraBlocks; + if (IfConv.TBB != IfConv.Tail) + ExtraBlocks.push_back(IfConv.TBB); + unsigned ResLength = FBBTrace.getResourceLength(ExtraBlocks); + DEBUG(dbgs() << "Resource length " << ResLength + << ", minimal critical path " << MinCrit << '\n'); + if (ResLength > MinCrit + CritLimit) { + DEBUG(dbgs() << "Not enough available ILP.\n"); + return false; + } + + // Assume that the depth of the first head terminator will also be the depth + // of the select instruction inserted, as determined by the flag dependency. + // TBB / FBB data dependencies may delay the select even more. + MachineTraceMetrics::Trace HeadTrace = MinInstr->getTrace(IfConv.Head); + unsigned BranchDepth = + HeadTrace.getInstrCycles(IfConv.Head->getFirstTerminator()).Depth; + DEBUG(dbgs() << "Branch depth: " << BranchDepth << '\n'); + + // Look at all the tail phis, and compute the critical path extension caused + // by inserting select instructions. + MachineTraceMetrics::Trace TailTrace = MinInstr->getTrace(IfConv.Tail); + for (unsigned i = 0, e = IfConv.PHIs.size(); i != e; ++i) { + SSAIfConv::PHIInfo &PI = IfConv.PHIs[i]; + unsigned Slack = TailTrace.getInstrSlack(PI.PHI); + unsigned MaxDepth = Slack + TailTrace.getInstrCycles(PI.PHI).Depth; + DEBUG(dbgs() << "Slack " << Slack << ":\t" << *PI.PHI); + + // The condition is pulled into the critical path. + unsigned CondDepth = adjCycles(BranchDepth, PI.CondCycles); + if (CondDepth > MaxDepth) { + unsigned Extra = CondDepth - MaxDepth; + DEBUG(dbgs() << "Condition adds " << Extra << " cycles.\n"); + if (Extra > CritLimit) { + DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n'); + return false; + } + } + + // The TBB value is pulled into the critical path. + unsigned TDepth = adjCycles(TBBTrace.getPHIDepth(PI.PHI), PI.TCycles); + if (TDepth > MaxDepth) { + unsigned Extra = TDepth - MaxDepth; + DEBUG(dbgs() << "TBB data adds " << Extra << " cycles.\n"); + if (Extra > CritLimit) { + DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n'); + return false; + } + } + + // The FBB value is pulled into the critical path. + unsigned FDepth = adjCycles(FBBTrace.getPHIDepth(PI.PHI), PI.FCycles); + if (FDepth > MaxDepth) { + unsigned Extra = FDepth - MaxDepth; + DEBUG(dbgs() << "FBB data adds " << Extra << " cycles.\n"); + if (Extra > CritLimit) { + DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n'); + return false; + } + } + } + return true; +} + /// Attempt repeated if-conversion on MBB, return true if successful. /// bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) { bool Changed = false; - while (IfConv.canConvertIf(MBB)) { + while (IfConv.canConvertIf(MBB) && shouldConvertIf()) { // If-convert MBB and update analyses. + invalidateTraces(); SmallVector<MachineBasicBlock*, 4> RemovedBlocks; IfConv.convertIf(RemovedBlocks); Changed = true; @@ -597,9 +779,12 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) { << ((Value*)MF.getFunction())->getName() << '\n'); TII = MF.getTarget().getInstrInfo(); TRI = MF.getTarget().getRegisterInfo(); + SchedModel = MF.getTarget().getInstrItineraryData()->SchedModel; MRI = &MF.getRegInfo(); DomTree = &getAnalysis<MachineDominatorTree>(); Loops = getAnalysisIfAvailable<MachineLoopInfo>(); + Traces = &getAnalysis<MachineTraceMetrics>(); + MinInstr = 0; bool Changed = false; IfConv.runOnMachineFunction(MF); diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp index b14afc2..7a17331 100644 --- a/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -131,13 +131,16 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) { } else { TII->copyPhysReg(*MBB, MI, MI->getDebugLoc(), DstSubReg, InsReg, MI->getOperand(2).isKill()); + + // Implicitly define DstReg for subsequent uses. + MachineBasicBlock::iterator CopyMI = MI; + --CopyMI; + CopyMI->addRegisterDefined(DstReg); + // Transfer the kill/dead flags, if needed. if (MI->getOperand(0).isDead()) TransferDeadFlag(MI, DstSubReg, TRI); - DEBUG({ - MachineBasicBlock::iterator dMI = MI; - dbgs() << "subreg: " << *(--dMI); - }); + DEBUG(dbgs() << "subreg: " << *CopyMI); } DEBUG(dbgs() << '\n'); diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index 01077db..0a795e6 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -160,7 +160,7 @@ void LiveInterval::markValNoForDeletion(VNInfo *ValNo) { valnos.pop_back(); } while (!valnos.empty() && valnos.back()->isUnused()); } else { - ValNo->setIsUnused(true); + ValNo->markUnused(); } } @@ -667,9 +667,6 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) { } } - // Merge the relevant flags. - V2->mergeFlags(V1); - // Now that V1 is dead, remove it. markValNoForDeletion(V1); @@ -737,9 +734,7 @@ void LiveInterval::print(raw_ostream &OS) const { } else { OS << vni->def; if (vni->isPHIDef()) - OS << "-phidef"; - if (vni->hasPHIKill()) - OS << "-phikill"; + OS << "-phi"; } } } @@ -827,14 +822,11 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[], MachineOperand &MO = RI.getOperand(); MachineInstr *MI = MO.getParent(); ++RI; - if (MO.isUse() && MO.isUndef()) - continue; // DBG_VALUE instructions should have been eliminated earlier. - SlotIndex Idx = LIS.getInstructionIndex(MI); - Idx = Idx.getRegSlot(MO.isUse()); - const VNInfo *VNI = LI.getVNInfoAt(Idx); - // FIXME: We should be able to assert(VNI) here, but the coalescer leaves - // dangling defs around. + LiveRangeQuery LRQ(LI, LIS.getInstructionIndex(MI)); + const VNInfo *VNI = MO.readsReg() ? LRQ.valueIn() : LRQ.valueDefined(); + // In the case of an <undef> use that isn't tied to any def, VNI will be + // NULL. If the use is tied to a def, VNI will be the defined value. if (!VNI) continue; MO.setReg(LIV[getEqClass(VNI)]->reg); diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 819707f..d0f8ae1 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -27,6 +27,7 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -38,7 +39,13 @@ #include <cmath> using namespace llvm; +// Switch to the new experimental algorithm for computing live intervals. +static cl::opt<bool> +NewLiveIntervals("new-live-intervals", cl::Hidden, + cl::desc("Use new algorithm forcomputing live intervals")); + char LiveIntervals::ID = 0; +char &llvm::LiveIntervalsID = LiveIntervals::ID; INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals", "Live Interval Analysis", false, false) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) @@ -105,7 +112,19 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { AllocatableRegs = TRI->getAllocatableSet(fn); ReservedRegs = TRI->getReservedRegs(fn); - computeIntervals(); + // Allocate space for all virtual registers. + VirtRegIntervals.resize(MRI->getNumVirtRegs()); + + if (NewLiveIntervals) { + // This is the new way of computing live intervals. + // It is independent of LiveVariables, and it can run at any time. + computeVirtRegs(); + computeRegMasks(); + } else { + // This is the old way of computing live intervals. + // It depends on LiveVariables. + computeIntervals(); + } computeLiveInRegUnits(); DEBUG(dump()); @@ -238,7 +257,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // new valno in the killing blocks. assert(vi.AliveBlocks.empty() && "Phi join can't pass through blocks"); DEBUG(dbgs() << " phi-join"); - ValNo->setHasPHIKill(true); } else { // Iterate over all of the blocks that the variable is completely // live in, adding [insrtIndex(begin), instrIndex(end)+4) to the @@ -266,7 +284,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, assert(getInstructionFromIndex(Start) == 0 && "PHI def index points at actual instruction."); ValNo = interval.getNextValue(Start, VNInfoAllocator); - ValNo->setIsPHIDef(true); } LiveRange LR(Start, killIdx, ValNo); interval.addRange(LR); @@ -340,7 +357,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, SlotIndex killIndex = getMBBEndIdx(mbb); LiveRange LR(defIndex, killIndex, ValNo); interval.addRange(LR); - ValNo->setHasPHIKill(true); DEBUG(dbgs() << " phi-join +" << LR); } else { llvm_unreachable("Multiply defined register"); @@ -442,6 +458,49 @@ LiveInterval* LiveIntervals::createInterval(unsigned reg) { } +/// computeVirtRegInterval - Compute the live interval of a virtual register, +/// based on defs and uses. +void LiveIntervals::computeVirtRegInterval(LiveInterval *LI) { + assert(LRCalc && "LRCalc not initialized."); + assert(LI->empty() && "Should only compute empty intervals."); + LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); + LRCalc->createDeadDefs(LI); + LRCalc->extendToUses(LI); +} + +void LiveIntervals::computeVirtRegs() { + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (MRI->reg_nodbg_empty(Reg)) + continue; + LiveInterval *LI = createInterval(Reg); + VirtRegIntervals[Reg] = LI; + computeVirtRegInterval(LI); + } +} + +void LiveIntervals::computeRegMasks() { + RegMaskBlocks.resize(MF->getNumBlockIDs()); + + // Find all instructions with regmask operands. + for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); + MBBI != E; ++MBBI) { + MachineBasicBlock *MBB = MBBI; + std::pair<unsigned, unsigned> &RMB = RegMaskBlocks[MBB->getNumber()]; + RMB.first = RegMaskSlots.size(); + for (MachineBasicBlock::iterator MI = MBB->begin(), ME = MBB->end(); + MI != ME; ++MI) + for (MIOperands MO(MI); MO.isValid(); ++MO) { + if (!MO->isRegMask()) + continue; + RegMaskSlots.push_back(Indexes->getInstructionIndex(MI).getRegSlot()); + RegMaskBits.push_back(MO->getRegMask()); + } + // Compute the number of register mask instructions in this block. + RMB.second = RegMaskSlots.size() - RMB.first;; + } +} + //===----------------------------------------------------------------------===// // Register Unit Liveness //===----------------------------------------------------------------------===// @@ -648,7 +707,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, continue; if (VNI->isPHIDef()) { // This is a dead PHI. Remove it. - VNI->setIsUnused(true); + VNI->markUnused(); NewLI.removeRange(*LII); DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n"); CanSeparate = true; @@ -720,6 +779,25 @@ LiveIntervals::intervalIsInOneMBB(const LiveInterval &LI) const { return MBB1 == MBB2 ? MBB1 : NULL; } +bool +LiveIntervals::hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const { + for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end(); + I != E; ++I) { + const VNInfo *PHI = *I; + if (PHI->isUnused() || !PHI->isPHIDef()) + continue; + const MachineBasicBlock *PHIMBB = getMBBFromIndex(PHI->def); + // Conservatively return true instead of scanning huge predecessor lists. + if (PHIMBB->pred_size() > 100) + return true; + for (MachineBasicBlock::const_pred_iterator + PI = PHIMBB->pred_begin(), PE = PHIMBB->pred_end(); PI != PE; ++PI) + if (VNI == LI.getVNInfoBefore(Indexes->getMBBEndIdx(*PI))) + return true; + } + return false; +} + float LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) { // Limit the loop depth ridiculousness. @@ -744,7 +822,6 @@ LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg, VNInfo* VN = Interval.getNextValue( SlotIndex(getInstructionIndex(startInst).getRegSlot()), getVNInfoAllocator()); - VN->setHasPHIKill(true); LiveRange LR( SlotIndex(getInstructionIndex(startInst).getRegSlot()), getMBBEndIdx(startInst->getParent()), VN); diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp index 9384075..d828f25 100644 --- a/lib/CodeGen/LiveRangeCalc.cpp +++ b/lib/CodeGen/LiveRangeCalc.cpp @@ -54,8 +54,7 @@ void LiveRangeCalc::createDeadDefs(LiveInterval *LI, unsigned Reg) { .getRegSlot(I.getOperand().isEarlyClobber()); // Create the def in LI. This may find an existing def. - VNInfo *VNI = LI->createDeadDef(Idx, *Alloc); - VNI->setIsPHIDef(MI->isPHI()); + LI->createDeadDef(Idx, *Alloc); } } @@ -320,7 +319,6 @@ void LiveRangeCalc::updateSSA() { SlotIndex Start, End; tie(Start, End) = Indexes->getMBBRange(MBB); VNInfo *VNI = I->LI->getNextValue(Start, *Alloc); - VNI->setIsPHIDef(true); I->Value = VNI; // This block is done, we know the final value. I->DomNode = 0; diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp index 896fdbf..b4ce9aa 100644 --- a/lib/CodeGen/LiveRangeEdit.cpp +++ b/lib/CodeGen/LiveRangeEdit.cpp @@ -239,6 +239,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, // Collect virtual registers to be erased after MI is gone. SmallVector<unsigned, 8> RegsToErase; + bool ReadsPhysRegs = false; // Check for live intervals that may shrink for (MachineInstr::mop_iterator MOI = MI->operands_begin(), @@ -246,8 +247,12 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, if (!MOI->isReg()) continue; unsigned Reg = MOI->getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!TargetRegisterInfo::isVirtualRegister(Reg)) { + // Check if MI reads any unreserved physregs. + if (Reg && MOI->readsReg() && !LIS.isReserved(Reg)) + ReadsPhysRegs = true; continue; + } LiveInterval &LI = LIS.getInterval(Reg); // Shrink read registers, unless it is likely to be expensive and @@ -271,11 +276,30 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, } } - if (TheDelegate) - TheDelegate->LRE_WillEraseInstruction(MI); - LIS.RemoveMachineInstrFromMaps(MI); - MI->eraseFromParent(); - ++NumDCEDeleted; + // Currently, we don't support DCE of physreg live ranges. If MI reads + // any unreserved physregs, don't erase the instruction, but turn it into + // a KILL instead. This way, the physreg live ranges don't end up + // dangling. + // FIXME: It would be better to have something like shrinkToUses() for + // physregs. That could potentially enable more DCE and it would free up + // the physreg. It would not happen often, though. + if (ReadsPhysRegs) { + MI->setDesc(TII.get(TargetOpcode::KILL)); + // Remove all operands that aren't physregs. + for (unsigned i = MI->getNumOperands(); i; --i) { + const MachineOperand &MO = MI->getOperand(i-1); + if (MO.isReg() && TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + continue; + MI->RemoveOperand(i-1); + } + DEBUG(dbgs() << "Converted physregs to:\t" << *MI); + } else { + if (TheDelegate) + TheDelegate->LRE_WillEraseInstruction(MI); + LIS.RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + ++NumDCEDeleted; + } // Erase any virtregs that are now empty and unused. There may be <undef> // uses around. Keep the empty live range in that case. diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index ecc1e95..cf13dbd 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -109,7 +109,8 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) { assert(N->getParent() != 0 && "machine instruction not in a basic block"); // Remove from the use/def lists. - N->RemoveRegOperandsFromUseLists(); + if (MachineFunction *MF = N->getParent()->getParent()) + N->RemoveRegOperandsFromUseLists(MF->getRegInfo()); N->setParent(0); @@ -310,8 +311,11 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const { if (!succ_empty()) { if (Indexes) OS << '\t'; OS << " Successors according to CFG:"; - for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI) + for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI) { OS << " BB#" << (*SI)->getNumber(); + if (!Weights.empty()) + OS << '(' << *getWeightIterator(SI) << ')'; + } OS << '\n'; } } @@ -477,18 +481,42 @@ MachineBasicBlock::removeSuccessor(succ_iterator I) { void MachineBasicBlock::replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New) { - uint32_t weight = 0; - succ_iterator SI = std::find(Successors.begin(), Successors.end(), Old); + if (Old == New) + return; - // If Weight list is empty it means we don't use it (disabled optimization). - if (!Weights.empty()) { - weight_iterator WI = getWeightIterator(SI); - weight = *WI; + succ_iterator E = succ_end(); + succ_iterator NewI = E; + succ_iterator OldI = E; + for (succ_iterator I = succ_begin(); I != E; ++I) { + if (*I == Old) { + OldI = I; + if (NewI != E) + break; + } + if (*I == New) { + NewI = I; + if (OldI != E) + break; + } } + assert(OldI != E && "Old is not a successor of this block"); + Old->removePredecessor(this); - // Update the successor information. - removeSuccessor(SI); - addSuccessor(New, weight); + // If New isn't already a successor, let it take Old's place. + if (NewI == E) { + New->addPredecessor(this); + *OldI = New; + return; + } + + // New is already a successor. + // Update its weight instead of adding a duplicate edge. + if (!Weights.empty()) { + weight_iterator OldWI = getWeightIterator(OldI); + *getWeightIterator(NewI) += *OldWI; + Weights.erase(OldWI); + } + Successors.erase(OldI); } void MachineBasicBlock::addPredecessor(MachineBasicBlock *pred) { @@ -507,14 +535,13 @@ void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB) { while (!fromMBB->succ_empty()) { MachineBasicBlock *Succ = *fromMBB->succ_begin(); - uint32_t weight = 0; - + uint32_t Weight = 0; // If Weight list is empty it means we don't use it (disabled optimization). if (!fromMBB->Weights.empty()) - weight = *fromMBB->Weights.begin(); + Weight = *fromMBB->Weights.begin(); - addSuccessor(Succ, weight); + addSuccessor(Succ, Weight); fromMBB->removeSuccessor(Succ); } } @@ -526,7 +553,10 @@ MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) { while (!fromMBB->succ_empty()) { MachineBasicBlock *Succ = *fromMBB->succ_begin(); - addSuccessor(Succ); + uint32_t Weight = 0; + if (!fromMBB->Weights.empty()) + Weight = *fromMBB->Weights.begin(); + addSuccessor(Succ, Weight); fromMBB->removeSuccessor(Succ); // Fix up any PHI nodes in the successor. @@ -540,9 +570,12 @@ MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) { } } +bool MachineBasicBlock::isPredecessor(const MachineBasicBlock *MBB) const { + return std::find(pred_begin(), pred_end(), MBB) != pred_end(); +} + bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const { - const_succ_iterator I = std::find(Successors.begin(), Successors.end(), MBB); - return I != Successors.end(); + return std::find(succ_begin(), succ_end(), MBB) != succ_end(); } bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const { @@ -909,12 +942,11 @@ MachineBasicBlock::findDebugLoc(instr_iterator MBBI) { /// getSuccWeight - Return weight of the edge from this block to MBB. /// -uint32_t MachineBasicBlock::getSuccWeight(const MachineBasicBlock *succ) const { +uint32_t MachineBasicBlock::getSuccWeight(const_succ_iterator Succ) const { if (Weights.empty()) return 0; - const_succ_iterator I = std::find(Successors.begin(), Successors.end(), succ); - return *getWeightIterator(I); + return *getWeightIterator(Succ); } /// getWeightIterator - Return wight iterator corresonding to the I successor diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 5a15f92..c4dca2c 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -985,8 +985,22 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // boiler plate. Cond.clear(); MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch. - if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) + if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) { + // If PrevBB has a two-way branch, try to re-order the branches + // such that we branch to the successor with higher weight first. + if (TBB && !Cond.empty() && FBB && + MBPI->getEdgeWeight(PrevBB, FBB) > MBPI->getEdgeWeight(PrevBB, TBB) && + !TII->ReverseBranchCondition(Cond)) { + DEBUG(dbgs() << "Reverse order of the two branches: " + << getBlockName(PrevBB) << "\n"); + DEBUG(dbgs() << " Edge weight: " << MBPI->getEdgeWeight(PrevBB, FBB) + << " vs " << MBPI->getEdgeWeight(PrevBB, TBB) << "\n"); + DebugLoc dl; // FIXME: this is nowhere + TII->RemoveBranch(*PrevBB); + TII->InsertBranch(*PrevBB, FBB, TBB, Cond, dl); + } PrevBB->updateTerminator(); + } } // Fixup the last block. @@ -997,29 +1011,63 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // Walk through the backedges of the function now that we have fully laid out // the basic blocks and align the destination of each backedge. We don't rely - // on the loop info here so that we can align backedges in unnatural CFGs and - // backedges that were introduced purely because of the loop rotations done - // during this layout pass. - // FIXME: This isn't quite right, we shouldn't align backedges that result - // from blocks being sunken below the exit block for the function. + // exclusively on the loop info here so that we can align backedges in + // unnatural CFGs and backedges that were introduced purely because of the + // loop rotations done during this layout pass. if (F.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) return; unsigned Align = TLI->getPrefLoopAlignment(); if (!Align) return; // Don't care about loop alignment. + if (FunctionChain.begin() == FunctionChain.end()) + return; // Empty chain. - SmallPtrSet<MachineBasicBlock *, 16> PreviousBlocks; - for (BlockChain::iterator BI = FunctionChain.begin(), + const BranchProbability ColdProb(1, 5); // 20% + BlockFrequency EntryFreq = MBFI->getBlockFreq(F.begin()); + BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb; + for (BlockChain::iterator BI = llvm::next(FunctionChain.begin()), BE = FunctionChain.end(); BI != BE; ++BI) { - PreviousBlocks.insert(*BI); - // Set alignment on the destination of all the back edges in the new - // ordering. - for (MachineBasicBlock::succ_iterator SI = (*BI)->succ_begin(), - SE = (*BI)->succ_end(); - SI != SE; ++SI) - if (PreviousBlocks.count(*SI)) - (*SI)->setAlignment(Align); + // Don't align non-looping basic blocks. These are unlikely to execute + // enough times to matter in practice. Note that we'll still handle + // unnatural CFGs inside of a natural outer loop (the common case) and + // rotated loops. + MachineLoop *L = MLI->getLoopFor(*BI); + if (!L) + continue; + + // If the block is cold relative to the function entry don't waste space + // aligning it. + BlockFrequency Freq = MBFI->getBlockFreq(*BI); + if (Freq < WeightedEntryFreq) + continue; + + // If the block is cold relative to its loop header, don't align it + // regardless of what edges into the block exist. + MachineBasicBlock *LoopHeader = L->getHeader(); + BlockFrequency LoopHeaderFreq = MBFI->getBlockFreq(LoopHeader); + if (Freq < (LoopHeaderFreq * ColdProb)) + continue; + + // Check for the existence of a non-layout predecessor which would benefit + // from aligning this block. + MachineBasicBlock *LayoutPred = *llvm::prior(BI); + + // Force alignment if all the predecessors are jumps. We already checked + // that the block isn't cold above. + if (!LayoutPred->isSuccessor(*BI)) { + (*BI)->setAlignment(Align); + continue; + } + + // Align this block if the layout predecessor's edge into this block is + // cold relative to the block. When this is true, othe predecessors make up + // all of the hot entries into the block and thus alignment is likely to be + // important. + BranchProbability LayoutProb = MBPI->getEdgeProbability(LayoutPred, *BI); + BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb; + if (LayoutEdgeFreq <= (Freq * ColdProb)) + (*BI)->setAlignment(Align); } } diff --git a/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/lib/CodeGen/MachineBranchProbabilityInfo.cpp index 0cc1af0..4479211 100644 --- a/lib/CodeGen/MachineBranchProbabilityInfo.cpp +++ b/lib/CodeGen/MachineBranchProbabilityInfo.cpp @@ -38,7 +38,7 @@ getSumForBlock(const MachineBasicBlock *MBB, uint32_t &Scale) const { Scale = 1; for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { - uint32_t Weight = getEdgeWeight(MBB, *I); + uint32_t Weight = getEdgeWeight(MBB, I); Sum += Weight; } @@ -53,22 +53,30 @@ getSumForBlock(const MachineBasicBlock *MBB, uint32_t &Scale) const { Sum = 0; for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { - uint32_t Weight = getEdgeWeight(MBB, *I); + uint32_t Weight = getEdgeWeight(MBB, I); Sum += Weight / Scale; } assert(Sum <= UINT32_MAX); return Sum; } -uint32_t -MachineBranchProbabilityInfo::getEdgeWeight(const MachineBasicBlock *Src, - const MachineBasicBlock *Dst) const { +uint32_t MachineBranchProbabilityInfo:: +getEdgeWeight(const MachineBasicBlock *Src, + MachineBasicBlock::const_succ_iterator Dst) const { uint32_t Weight = Src->getSuccWeight(Dst); if (!Weight) return DEFAULT_WEIGHT; return Weight; } +uint32_t MachineBranchProbabilityInfo:: +getEdgeWeight(const MachineBasicBlock *Src, + const MachineBasicBlock *Dst) const { + // This is a linear search. Try to use the const_succ_iterator version when + // possible. + return getEdgeWeight(Src, std::find(Src->succ_begin(), Src->succ_end(), Dst)); +} + bool MachineBranchProbabilityInfo::isEdgeHot(MachineBasicBlock *Src, MachineBasicBlock *Dst) const { // Hot probability is at least 4/5 = 80% @@ -82,7 +90,7 @@ MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const { MachineBasicBlock *MaxSucc = 0; for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { - uint32_t Weight = getEdgeWeight(MBB, *I); + uint32_t Weight = getEdgeWeight(MBB, I); if (Weight > MaxWeight) { MaxWeight = Weight; MaxSucc = *I; diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index 9cfe9ab..896461f 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -215,8 +215,10 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, if (MO.isDef() && (MO.isDead() || isPhysDefTriviallyDead(Reg, I, MBB->end()))) continue; - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - PhysRefs.insert(*AI); + // Reading constant physregs is ok. + if (!MRI->isConstantPhysReg(Reg, *MBB->getParent())) + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + PhysRefs.insert(*AI); if (MO.isDef()) PhysDefs.push_back(Reg); } @@ -324,6 +326,29 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, MachineInstr *CSMI, MachineInstr *MI) { // FIXME: Heuristics that works around the lack the live range splitting. + // If CSReg is used at all uses of Reg, CSE should not increase register + // pressure of CSReg. + bool MayIncreasePressure = true; + if (TargetRegisterInfo::isVirtualRegister(CSReg) && + TargetRegisterInfo::isVirtualRegister(Reg)) { + MayIncreasePressure = false; + SmallPtrSet<MachineInstr*, 8> CSUses; + for (MachineRegisterInfo::use_nodbg_iterator I =MRI->use_nodbg_begin(CSReg), + E = MRI->use_nodbg_end(); I != E; ++I) { + MachineInstr *Use = &*I; + CSUses.insert(Use); + } + for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg), + E = MRI->use_nodbg_end(); I != E; ++I) { + MachineInstr *Use = &*I; + if (!CSUses.count(Use)) { + MayIncreasePressure = true; + break; + } + } + } + if (!MayIncreasePressure) return true; + // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in // an immediate predecessor. We don't want to increase register pressure and // end up causing other computation to be spilled. @@ -394,6 +419,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { bool Changed = false; SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs; + SmallVector<unsigned, 2> ImplicitDefsToUpdate; for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) { MachineInstr *MI = &*I; ++I; @@ -463,15 +489,24 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { // Check if it's profitable to perform this CSE. bool DoCSE = true; - unsigned NumDefs = MI->getDesc().getNumDefs(); + unsigned NumDefs = MI->getDesc().getNumDefs() + + MI->getDesc().getNumImplicitDefs(); + for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; unsigned OldReg = MO.getReg(); unsigned NewReg = CSMI->getOperand(i).getReg(); - if (OldReg == NewReg) + + // Go through implicit defs of CSMI and MI, if a def is not dead at MI, + // we should make sure it is not dead at CSMI. + if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead()) + ImplicitDefsToUpdate.push_back(i); + if (OldReg == NewReg) { + --NumDefs; continue; + } assert(TargetRegisterInfo::isVirtualRegister(OldReg) && TargetRegisterInfo::isVirtualRegister(NewReg) && @@ -503,6 +538,11 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { MRI->clearKillFlags(CSEPairs[i].second); } + // Go through implicit defs of CSMI and MI, if a def is not dead at MI, + // we should make sure it is not dead at CSMI. + for (unsigned i = 0, e = ImplicitDefsToUpdate.size(); i != e; ++i) + CSMI->getOperand(ImplicitDefsToUpdate[i]).setIsDead(false); + if (CrossMBBPhysDef) { // Add physical register defs now coming in from a predecessor to MBB // livein list. @@ -526,6 +566,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { Exps.push_back(MI); } CSEPairs.clear(); + ImplicitDefsToUpdate.clear(); } return Changed; diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 8dada05..b166849 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -47,55 +47,6 @@ using namespace llvm; // MachineOperand Implementation //===----------------------------------------------------------------------===// -/// AddRegOperandToRegInfo - Add this register operand to the specified -/// MachineRegisterInfo. If it is null, then the next/prev fields should be -/// explicitly nulled out. -void MachineOperand::AddRegOperandToRegInfo(MachineRegisterInfo *RegInfo) { - assert(isReg() && "Can only add reg operand to use lists"); - - // If the reginfo pointer is null, just explicitly null out or next/prev - // pointers, to ensure they are not garbage. - if (RegInfo == 0) { - Contents.Reg.Prev = 0; - Contents.Reg.Next = 0; - return; - } - - // Otherwise, add this operand to the head of the registers use/def list. - MachineOperand **Head = &RegInfo->getRegUseDefListHead(getReg()); - - // For SSA values, we prefer to keep the definition at the start of the list. - // we do this by skipping over the definition if it is at the head of the - // list. - if (*Head && (*Head)->isDef()) - Head = &(*Head)->Contents.Reg.Next; - - Contents.Reg.Next = *Head; - if (Contents.Reg.Next) { - assert(getReg() == Contents.Reg.Next->getReg() && - "Different regs on the same list!"); - Contents.Reg.Next->Contents.Reg.Prev = &Contents.Reg.Next; - } - - Contents.Reg.Prev = Head; - *Head = this; -} - -/// RemoveRegOperandFromRegInfo - Remove this register operand from the -/// MachineRegisterInfo it is linked with. -void MachineOperand::RemoveRegOperandFromRegInfo() { - assert(isOnRegUseList() && "Reg operand is not on a use list"); - // Unlink this from the doubly linked list of operands. - MachineOperand *NextOp = Contents.Reg.Next; - *Contents.Reg.Prev = NextOp; - if (NextOp) { - assert(NextOp->getReg() == getReg() && "Corrupt reg use/def chain!"); - NextOp->Contents.Reg.Prev = Contents.Reg.Prev; - } - Contents.Reg.Prev = 0; - Contents.Reg.Next = 0; -} - void MachineOperand::setReg(unsigned Reg) { if (getReg() == Reg) return; // No change. @@ -105,9 +56,10 @@ void MachineOperand::setReg(unsigned Reg) { if (MachineInstr *MI = getParent()) if (MachineBasicBlock *MBB = MI->getParent()) if (MachineFunction *MF = MBB->getParent()) { - RemoveRegOperandFromRegInfo(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + MRI.removeRegOperandFromUseList(this); SmallContents.RegNo = Reg; - AddRegOperandToRegInfo(&MF->getRegInfo()); + MRI.addRegOperandToUseList(this); return; } @@ -136,15 +88,36 @@ void MachineOperand::substPhysReg(unsigned Reg, const TargetRegisterInfo &TRI) { setReg(Reg); } +/// Change a def to a use, or a use to a def. +void MachineOperand::setIsDef(bool Val) { + assert(isReg() && "Wrong MachineOperand accessor"); + assert((!Val || !isDebug()) && "Marking a debug operation as def"); + if (IsDef == Val) + return; + // MRI may keep uses and defs in different list positions. + if (MachineInstr *MI = getParent()) + if (MachineBasicBlock *MBB = MI->getParent()) + if (MachineFunction *MF = MBB->getParent()) { + MachineRegisterInfo &MRI = MF->getRegInfo(); + MRI.removeRegOperandFromUseList(this); + IsDef = Val; + MRI.addRegOperandToUseList(this); + return; + } + IsDef = Val; +} + /// ChangeToImmediate - Replace this operand with a new immediate operand of /// the specified value. If an operand is known to be an immediate already, /// the setImm method should be used. void MachineOperand::ChangeToImmediate(int64_t ImmVal) { // If this operand is currently a register operand, and if this is in a // function, deregister the operand from the register's use/def list. - if (isReg() && getParent() && getParent()->getParent() && - getParent()->getParent()->getParent()) - RemoveRegOperandFromRegInfo(); + if (isReg() && isOnRegUseList()) + if (MachineInstr *MI = getParent()) + if (MachineBasicBlock *MBB = MI->getParent()) + if (MachineFunction *MF = MBB->getParent()) + MF->getRegInfo().removeRegOperandFromUseList(this); OpKind = MO_Immediate; Contents.ImmVal = ImmVal; @@ -156,24 +129,20 @@ void MachineOperand::ChangeToImmediate(int64_t ImmVal) { void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp, bool isKill, bool isDead, bool isUndef, bool isDebug) { - // If this operand is already a register operand, use setReg to update the + MachineRegisterInfo *RegInfo = 0; + if (MachineInstr *MI = getParent()) + if (MachineBasicBlock *MBB = MI->getParent()) + if (MachineFunction *MF = MBB->getParent()) + RegInfo = &MF->getRegInfo(); + // If this operand is already a register operand, remove it from the // register's use/def lists. - if (isReg()) { - assert(!isEarlyClobber()); - setReg(Reg); - } else { - // Otherwise, change this to a register and set the reg#. - OpKind = MO_Register; - SmallContents.RegNo = Reg; - - // If this operand is embedded in a function, add the operand to the - // register's use/def list. - if (MachineInstr *MI = getParent()) - if (MachineBasicBlock *MBB = MI->getParent()) - if (MachineFunction *MF = MBB->getParent()) - AddRegOperandToRegInfo(&MF->getRegInfo()); - } + if (RegInfo && isReg()) + RegInfo->removeRegOperandFromUseList(this); + // Change this to a register and set the reg#. + OpKind = MO_Register; + SmallContents.RegNo = Reg; + SubReg = 0; IsDef = isDef; IsImp = isImp; IsKill = isKill; @@ -182,7 +151,13 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp, IsInternalRead = false; IsEarlyClobber = false; IsDebug = isDebug; - SubReg = 0; + // Ensure isOnRegUseList() returns false. + Contents.Reg.Prev = 0; + + // If this operand is embedded in a function, add the operand to the + // register's use/def list. + if (RegInfo) + RegInfo->addRegOperandToUseList(this); } /// isIdenticalTo - Return true if this operand is identical to the specified @@ -208,6 +183,7 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { case MachineOperand::MO_FrameIndex: return getIndex() == Other.getIndex(); case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_TargetIndex: return getIndex() == Other.getIndex() && getOffset() == Other.getOffset(); case MachineOperand::MO_JumpTableIndex: return getIndex() == Other.getIndex(); @@ -245,6 +221,7 @@ hash_code llvm::hash_value(const MachineOperand &MO) { case MachineOperand::MO_FrameIndex: return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex()); case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_TargetIndex: return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex(), MO.getOffset()); case MachineOperand::MO_JumpTableIndex: @@ -353,6 +330,11 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { if (getOffset()) OS << "+" << getOffset(); OS << '>'; break; + case MachineOperand::MO_TargetIndex: + OS << "<ti#" << getIndex(); + if (getOffset()) OS << "+" << getOffset(); + OS << '>'; + break; case MachineOperand::MO_JumpTableIndex: OS << "<jt#" << getIndex() << '>'; break; @@ -650,24 +632,21 @@ MachineRegisterInfo *MachineInstr::getRegInfo() { /// RemoveRegOperandsFromUseLists - Unlink all of the register operands in /// this instruction from their respective use lists. This requires that the /// operands already be on their use lists. -void MachineInstr::RemoveRegOperandsFromUseLists() { - for (unsigned i = 0, e = Operands.size(); i != e; ++i) { +void MachineInstr::RemoveRegOperandsFromUseLists(MachineRegisterInfo &MRI) { + for (unsigned i = 0, e = Operands.size(); i != e; ++i) if (Operands[i].isReg()) - Operands[i].RemoveRegOperandFromRegInfo(); - } + MRI.removeRegOperandFromUseList(&Operands[i]); } /// AddRegOperandsToUseLists - Add all of the register operands in /// this instruction from their respective use lists. This requires that the /// operands not be on their use lists yet. -void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &RegInfo) { - for (unsigned i = 0, e = Operands.size(); i != e; ++i) { +void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &MRI) { + for (unsigned i = 0, e = Operands.size(); i != e; ++i) if (Operands[i].isReg()) - Operands[i].AddRegOperandToRegInfo(&RegInfo); - } + MRI.addRegOperandToUseList(&Operands[i]); } - /// addOperand - Add the specified operand to the instruction. If it is an /// implicit operand, it is added to the end of the operand list. If it is /// an explicit operand it is added at the end of the explicit operand list @@ -695,7 +674,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) { while (OpNo && Operands[OpNo-1].isReg() && Operands[OpNo-1].isImplicit()) { --OpNo; if (RegInfo) - Operands[OpNo].RemoveRegOperandFromRegInfo(); + RegInfo->removeRegOperandFromUseList(&Operands[OpNo]); } } @@ -712,7 +691,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) { if (Reallocate) for (unsigned i = 0; i != OpNo; ++i) if (Operands[i].isReg()) - Operands[i].RemoveRegOperandFromRegInfo(); + RegInfo->removeRegOperandFromUseList(&Operands[i]); // Insert the new operand at OpNo. Operands.insert(Operands.begin() + OpNo, Op); @@ -723,13 +702,15 @@ void MachineInstr::addOperand(const MachineOperand &Op) { if (Reallocate) for (unsigned i = 0; i != OpNo; ++i) if (Operands[i].isReg()) - Operands[i].AddRegOperandToRegInfo(RegInfo); + RegInfo->addRegOperandToUseList(&Operands[i]); // When adding a register operand, tell RegInfo about it. if (Operands[OpNo].isReg()) { - // Add the new operand to RegInfo, even when RegInfo is NULL. - // This will initialize the linked list pointers. - Operands[OpNo].AddRegOperandToRegInfo(RegInfo); + // Ensure isOnRegUseList() returns false, regardless of Op's status. + Operands[OpNo].Contents.Reg.Prev = 0; + // Add the new operand to RegInfo. + if (RegInfo) + RegInfo->addRegOperandToUseList(&Operands[OpNo]); // If the register operand is flagged as early, mark the operand as such. if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1) Operands[OpNo].setIsEarlyClobber(true); @@ -739,7 +720,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) { if (RegInfo) { for (unsigned i = OpNo + 1, e = Operands.size(); i != e; ++i) { assert(Operands[i].isReg() && "Should only be an implicit reg!"); - Operands[i].AddRegOperandToRegInfo(RegInfo); + RegInfo->addRegOperandToUseList(&Operands[i]); } } } @@ -749,12 +730,13 @@ void MachineInstr::addOperand(const MachineOperand &Op) { /// void MachineInstr::RemoveOperand(unsigned OpNo) { assert(OpNo < Operands.size() && "Invalid operand number"); + MachineRegisterInfo *RegInfo = getRegInfo(); // Special case removing the last one. if (OpNo == Operands.size()-1) { // If needed, remove from the reg def/use list. - if (Operands.back().isReg() && Operands.back().isOnRegUseList()) - Operands.back().RemoveRegOperandFromRegInfo(); + if (RegInfo && Operands.back().isReg() && Operands.back().isOnRegUseList()) + RegInfo->removeRegOperandFromUseList(&Operands.back()); Operands.pop_back(); return; @@ -763,11 +745,10 @@ void MachineInstr::RemoveOperand(unsigned OpNo) { // Otherwise, we are removing an interior operand. If we have reginfo to // update, remove all operands that will be shifted down from their reg lists, // move everything down, then re-add them. - MachineRegisterInfo *RegInfo = getRegInfo(); if (RegInfo) { for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) { if (Operands[i].isReg()) - Operands[i].RemoveRegOperandFromRegInfo(); + RegInfo->removeRegOperandFromUseList(&Operands[i]); } } @@ -776,7 +757,7 @@ void MachineInstr::RemoveOperand(unsigned OpNo) { if (RegInfo) { for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) { if (Operands[i].isReg()) - Operands[i].AddRegOperandToRegInfo(RegInfo); + RegInfo->addRegOperandToUseList(&Operands[i]); } } } diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index 82e1235..5fb938f 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -102,17 +102,9 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){ // New virtual register number. unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs()); - - // Add a reg, but keep track of whether the vector reallocated or not. - const unsigned FirstVirtReg = TargetRegisterInfo::index2VirtReg(0); - void *ArrayBase = getNumVirtRegs() == 0 ? 0 : &VRegInfo[FirstVirtReg]; VRegInfo.grow(Reg); VRegInfo[Reg].first = RegClass; RegAllocHints.grow(Reg); - - if (ArrayBase && &VRegInfo[FirstVirtReg] != ArrayBase) - // The vector reallocated, handle this now. - HandleVRegListReallocation(); return Reg; } @@ -126,21 +118,68 @@ void MachineRegisterInfo::clearVirtRegs() { VRegInfo.clear(); } -/// HandleVRegListReallocation - We just added a virtual register to the -/// VRegInfo info list and it reallocated. Update the use/def lists info -/// pointers. -void MachineRegisterInfo::HandleVRegListReallocation() { - // The back pointers for the vreg lists point into the previous vector. - // Update them to point to their correct slots. - for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); - MachineOperand *List = VRegInfo[Reg].second; - if (!List) continue; - // Update the back-pointer to be accurate once more. - List->Contents.Reg.Prev = &VRegInfo[Reg].second; +/// Add MO to the linked list of operands for its register. +void MachineRegisterInfo::addRegOperandToUseList(MachineOperand *MO) { + assert(!MO->isOnRegUseList() && "Already on list"); + MachineOperand *&HeadRef = getRegUseDefListHead(MO->getReg()); + MachineOperand *const Head = HeadRef; + + // Head points to the first list element. + // Next is NULL on the last list element. + // Prev pointers are circular, so Head->Prev == Last. + + // Head is NULL for an empty list. + if (!Head) { + MO->Contents.Reg.Prev = MO; + MO->Contents.Reg.Next = 0; + HeadRef = MO; + return; + } + assert(MO->getReg() == Head->getReg() && "Different regs on the same list!"); + + // Insert MO between Last and Head in the circular Prev chain. + MachineOperand *Last = Head->Contents.Reg.Prev; + assert(Last && "Inconsistent use list"); + assert(MO->getReg() == Last->getReg() && "Different regs on the same list!"); + Head->Contents.Reg.Prev = MO; + MO->Contents.Reg.Prev = Last; + + // Def operands always precede uses. This allows def_iterator to stop early. + // Insert def operands at the front, and use operands at the back. + if (MO->isDef()) { + // Insert def at the front. + MO->Contents.Reg.Next = Head; + HeadRef = MO; + } else { + // Insert use at the end. + MO->Contents.Reg.Next = 0; + Last->Contents.Reg.Next = MO; } } +/// Remove MO from its use-def list. +void MachineRegisterInfo::removeRegOperandFromUseList(MachineOperand *MO) { + assert(MO->isOnRegUseList() && "Operand not on use list"); + MachineOperand *&HeadRef = getRegUseDefListHead(MO->getReg()); + MachineOperand *const Head = HeadRef; + assert(Head && "List already empty"); + + // Unlink this from the doubly linked list of operands. + MachineOperand *Next = MO->Contents.Reg.Next; + MachineOperand *Prev = MO->Contents.Reg.Prev; + + // Prev links are circular, next link is NULL instead of looping back to Head. + if (MO == Head) + HeadRef = Next; + else + Prev->Contents.Reg.Next = Next; + + (Next ? Next : Head)->Contents.Reg.Prev = Prev; + + MO->Contents.Reg.Prev = 0; + MO->Contents.Reg.Next = 0; +} + /// replaceRegWith - Replace all instances of FromReg with ToReg in the /// machine function. This is like llvm-level X->replaceAllUsesWith(Y), /// except that it also changes any definitions of the register as well. @@ -178,13 +217,6 @@ MachineInstr *MachineRegisterInfo::getUniqueVRegDef(unsigned Reg) const { return &*I; } -bool MachineRegisterInfo::hasOneUse(unsigned RegNo) const { - use_iterator UI = use_begin(RegNo); - if (UI == use_end()) - return false; - return ++UI == use_end(); -} - bool MachineRegisterInfo::hasOneNonDBGUse(unsigned RegNo) const { use_nodbg_iterator UI = use_nodbg_begin(RegNo); if (UI == use_nodbg_end()) diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp index acb1ee6..076547a 100644 --- a/lib/CodeGen/MachineSSAUpdater.cpp +++ b/lib/CodeGen/MachineSSAUpdater.cpp @@ -42,7 +42,7 @@ MachineSSAUpdater::MachineSSAUpdater(MachineFunction &MF, } MachineSSAUpdater::~MachineSSAUpdater() { - delete &getAvailableVals(AV); + delete static_cast<AvailableValsTy*>(AV); } /// Initialize - Reset this object to get ready for a new set of SSA diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index 1ce546b..bc383cb 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -99,6 +99,16 @@ namespace { bool PerformTrivialForwardCoalescing(MachineInstr *MI, MachineBasicBlock *MBB); }; + + // SuccessorSorter - Sort Successors according to their loop depth. + struct SuccessorSorter { + SuccessorSorter(MachineLoopInfo *LoopInfo) : LI(LoopInfo) {} + bool operator()(const MachineBasicBlock *LHS, + const MachineBasicBlock *RHS) const { + return LI->getLoopDepth(LHS) < LI->getLoopDepth(RHS); + } + MachineLoopInfo *LI; + }; } // end anonymous namespace char MachineSinking::ID = 0; @@ -526,8 +536,11 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI, // Otherwise, we should look at all the successors and decide which one // we should sink to. - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - E = MBB->succ_end(); SI != E; ++SI) { + // We give successors with smaller loop depth higher priority. + SmallVector<MachineBasicBlock*, 4> Succs(MBB->succ_begin(), MBB->succ_end()); + std::stable_sort(Succs.begin(), Succs.end(), SuccessorSorter(LI)); + for (SmallVector<MachineBasicBlock*, 4>::iterator SI = Succs.begin(), + E = Succs.end(); SI != E; ++SI) { MachineBasicBlock *SuccBlock = *SI; bool LocalUse = false; if (AllUsesDominatedByBlock(Reg, SuccBlock, MBB, diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp new file mode 100644 index 0000000..1a3aa60 --- /dev/null +++ b/lib/CodeGen/MachineTraceMetrics.cpp @@ -0,0 +1,1153 @@ +//===- lib/CodeGen/MachineTraceMetrics.cpp ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "machine-trace-metrics" +#include "MachineTraceMetrics.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SparseSet.h" + +using namespace llvm; + +char MachineTraceMetrics::ID = 0; +char &llvm::MachineTraceMetricsID = MachineTraceMetrics::ID; + +INITIALIZE_PASS_BEGIN(MachineTraceMetrics, + "machine-trace-metrics", "Machine Trace Metrics", false, true) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(MachineTraceMetrics, + "machine-trace-metrics", "Machine Trace Metrics", false, true) + +MachineTraceMetrics::MachineTraceMetrics() + : MachineFunctionPass(ID), MF(0), TII(0), TRI(0), MRI(0), Loops(0) { + std::fill(Ensembles, array_endof(Ensembles), (Ensemble*)0); +} + +void MachineTraceMetrics::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<MachineBranchProbabilityInfo>(); + AU.addRequired<MachineLoopInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) { + MF = &Func; + TII = MF->getTarget().getInstrInfo(); + TRI = MF->getTarget().getRegisterInfo(); + ItinData = MF->getTarget().getInstrItineraryData(); + MRI = &MF->getRegInfo(); + Loops = &getAnalysis<MachineLoopInfo>(); + BlockInfo.resize(MF->getNumBlockIDs()); + return false; +} + +void MachineTraceMetrics::releaseMemory() { + MF = 0; + BlockInfo.clear(); + for (unsigned i = 0; i != TS_NumStrategies; ++i) { + delete Ensembles[i]; + Ensembles[i] = 0; + } +} + +//===----------------------------------------------------------------------===// +// Fixed block information +//===----------------------------------------------------------------------===// +// +// The number of instructions in a basic block and the CPU resources used by +// those instructions don't depend on any given trace strategy. + +/// Compute the resource usage in basic block MBB. +const MachineTraceMetrics::FixedBlockInfo* +MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) { + assert(MBB && "No basic block"); + FixedBlockInfo *FBI = &BlockInfo[MBB->getNumber()]; + if (FBI->hasResources()) + return FBI; + + // Compute resource usage in the block. + // FIXME: Compute per-functional unit counts. + FBI->HasCalls = false; + unsigned InstrCount = 0; + for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + const MachineInstr *MI = I; + if (MI->isTransient()) + continue; + ++InstrCount; + if (MI->isCall()) + FBI->HasCalls = true; + } + FBI->InstrCount = InstrCount; + return FBI; +} + +//===----------------------------------------------------------------------===// +// Ensemble utility functions +//===----------------------------------------------------------------------===// + +MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics *ct) + : MTM(*ct) { + BlockInfo.resize(MTM.BlockInfo.size()); +} + +// Virtual destructor serves as an anchor. +MachineTraceMetrics::Ensemble::~Ensemble() {} + +const MachineLoop* +MachineTraceMetrics::Ensemble::getLoopFor(const MachineBasicBlock *MBB) const { + return MTM.Loops->getLoopFor(MBB); +} + +// Update resource-related information in the TraceBlockInfo for MBB. +// Only update resources related to the trace above MBB. +void MachineTraceMetrics::Ensemble:: +computeDepthResources(const MachineBasicBlock *MBB) { + TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; + + // Compute resources from trace above. The top block is simple. + if (!TBI->Pred) { + TBI->InstrDepth = 0; + TBI->Head = MBB->getNumber(); + return; + } + + // Compute from the block above. A post-order traversal ensures the + // predecessor is always computed first. + TraceBlockInfo *PredTBI = &BlockInfo[TBI->Pred->getNumber()]; + assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet"); + const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred); + TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount; + TBI->Head = PredTBI->Head; +} + +// Update resource-related information in the TraceBlockInfo for MBB. +// Only update resources related to the trace below MBB. +void MachineTraceMetrics::Ensemble:: +computeHeightResources(const MachineBasicBlock *MBB) { + TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; + + // Compute resources for the current block. + TBI->InstrHeight = MTM.getResources(MBB)->InstrCount; + + // The trace tail is done. + if (!TBI->Succ) { + TBI->Tail = MBB->getNumber(); + return; + } + + // Compute from the block below. A post-order traversal ensures the + // predecessor is always computed first. + TraceBlockInfo *SuccTBI = &BlockInfo[TBI->Succ->getNumber()]; + assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet"); + TBI->InstrHeight += SuccTBI->InstrHeight; + TBI->Tail = SuccTBI->Tail; +} + +// Check if depth resources for MBB are valid and return the TBI. +// Return NULL if the resources have been invalidated. +const MachineTraceMetrics::TraceBlockInfo* +MachineTraceMetrics::Ensemble:: +getDepthResources(const MachineBasicBlock *MBB) const { + const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; + return TBI->hasValidDepth() ? TBI : 0; +} + +// Check if height resources for MBB are valid and return the TBI. +// Return NULL if the resources have been invalidated. +const MachineTraceMetrics::TraceBlockInfo* +MachineTraceMetrics::Ensemble:: +getHeightResources(const MachineBasicBlock *MBB) const { + const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; + return TBI->hasValidHeight() ? TBI : 0; +} + +//===----------------------------------------------------------------------===// +// Trace Selection Strategies +//===----------------------------------------------------------------------===// +// +// A trace selection strategy is implemented as a sub-class of Ensemble. The +// trace through a block B is computed by two DFS traversals of the CFG +// starting from B. One upwards, and one downwards. During the upwards DFS, +// pickTracePred() is called on the post-ordered blocks. During the downwards +// DFS, pickTraceSucc() is called in a post-order. +// + +// We never allow traces that leave loops, but we do allow traces to enter +// nested loops. We also never allow traces to contain back-edges. +// +// This means that a loop header can never appear above the center block of a +// trace, except as the trace head. Below the center block, loop exiting edges +// are banned. +// +// Return true if an edge from the From loop to the To loop is leaving a loop. +// Either of To and From can be null. +static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To) { + return From && !From->contains(To); +} + +// MinInstrCountEnsemble - Pick the trace that executes the least number of +// instructions. +namespace { +class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble { + const char *getName() const { return "MinInstr"; } + const MachineBasicBlock *pickTracePred(const MachineBasicBlock*); + const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*); + +public: + MinInstrCountEnsemble(MachineTraceMetrics *mtm) + : MachineTraceMetrics::Ensemble(mtm) {} +}; +} + +// Select the preferred predecessor for MBB. +const MachineBasicBlock* +MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) { + if (MBB->pred_empty()) + return 0; + const MachineLoop *CurLoop = getLoopFor(MBB); + // Don't leave loops, and never follow back-edges. + if (CurLoop && MBB == CurLoop->getHeader()) + return 0; + unsigned CurCount = MTM.getResources(MBB)->InstrCount; + const MachineBasicBlock *Best = 0; + unsigned BestDepth = 0; + for (MachineBasicBlock::const_pred_iterator + I = MBB->pred_begin(), E = MBB->pred_end(); I != E; ++I) { + const MachineBasicBlock *Pred = *I; + const MachineTraceMetrics::TraceBlockInfo *PredTBI = + getDepthResources(Pred); + // Ignore cycles that aren't natural loops. + if (!PredTBI) + continue; + // Pick the predecessor that would give this block the smallest InstrDepth. + unsigned Depth = PredTBI->InstrDepth + CurCount; + if (!Best || Depth < BestDepth) + Best = Pred, BestDepth = Depth; + } + return Best; +} + +// Select the preferred successor for MBB. +const MachineBasicBlock* +MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) { + if (MBB->pred_empty()) + return 0; + const MachineLoop *CurLoop = getLoopFor(MBB); + const MachineBasicBlock *Best = 0; + unsigned BestHeight = 0; + for (MachineBasicBlock::const_succ_iterator + I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { + const MachineBasicBlock *Succ = *I; + // Don't consider back-edges. + if (CurLoop && Succ == CurLoop->getHeader()) + continue; + // Don't consider successors exiting CurLoop. + if (isExitingLoop(CurLoop, getLoopFor(Succ))) + continue; + const MachineTraceMetrics::TraceBlockInfo *SuccTBI = + getHeightResources(Succ); + // Ignore cycles that aren't natural loops. + if (!SuccTBI) + continue; + // Pick the successor that would give this block the smallest InstrHeight. + unsigned Height = SuccTBI->InstrHeight; + if (!Best || Height < BestHeight) + Best = Succ, BestHeight = Height; + } + return Best; +} + +// Get an Ensemble sub-class for the requested trace strategy. +MachineTraceMetrics::Ensemble * +MachineTraceMetrics::getEnsemble(MachineTraceMetrics::Strategy strategy) { + assert(strategy < TS_NumStrategies && "Invalid trace strategy enum"); + Ensemble *&E = Ensembles[strategy]; + if (E) + return E; + + // Allocate new Ensemble on demand. + switch (strategy) { + case TS_MinInstrCount: return (E = new MinInstrCountEnsemble(this)); + default: llvm_unreachable("Invalid trace strategy enum"); + } +} + +void MachineTraceMetrics::invalidate(const MachineBasicBlock *MBB) { + DEBUG(dbgs() << "Invalidate traces through BB#" << MBB->getNumber() << '\n'); + BlockInfo[MBB->getNumber()].invalidate(); + for (unsigned i = 0; i != TS_NumStrategies; ++i) + if (Ensembles[i]) + Ensembles[i]->invalidate(MBB); +} + +void MachineTraceMetrics::verifyAnalysis() const { + if (!MF) + return; +#ifndef NDEBUG + assert(BlockInfo.size() == MF->getNumBlockIDs() && "Outdated BlockInfo size"); + for (unsigned i = 0; i != TS_NumStrategies; ++i) + if (Ensembles[i]) + Ensembles[i]->verify(); +#endif +} + +//===----------------------------------------------------------------------===// +// Trace building +//===----------------------------------------------------------------------===// +// +// Traces are built by two CFG traversals. To avoid recomputing too much, use a +// set abstraction that confines the search to the current loop, and doesn't +// revisit blocks. + +namespace { +struct LoopBounds { + MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> Blocks; + SmallPtrSet<const MachineBasicBlock*, 8> Visited; + const MachineLoopInfo *Loops; + bool Downward; + LoopBounds(MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> blocks, + const MachineLoopInfo *loops) + : Blocks(blocks), Loops(loops), Downward(false) {} +}; +} + +// Specialize po_iterator_storage in order to prune the post-order traversal so +// it is limited to the current loop and doesn't traverse the loop back edges. +namespace llvm { +template<> +class po_iterator_storage<LoopBounds, true> { + LoopBounds &LB; +public: + po_iterator_storage(LoopBounds &lb) : LB(lb) {} + void finishPostorder(const MachineBasicBlock*) {} + + bool insertEdge(const MachineBasicBlock *From, const MachineBasicBlock *To) { + // Skip already visited To blocks. + MachineTraceMetrics::TraceBlockInfo &TBI = LB.Blocks[To->getNumber()]; + if (LB.Downward ? TBI.hasValidHeight() : TBI.hasValidDepth()) + return false; + // From is null once when To is the trace center block. + if (From) { + if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(From)) { + // Don't follow backedges, don't leave FromLoop when going upwards. + if ((LB.Downward ? To : From) == FromLoop->getHeader()) + return false; + // Don't leave FromLoop. + if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To))) + return false; + } + } + // To is a new block. Mark the block as visited in case the CFG has cycles + // that MachineLoopInfo didn't recognize as a natural loop. + return LB.Visited.insert(To); + } +}; +} + +/// Compute the trace through MBB. +void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) { + DEBUG(dbgs() << "Computing " << getName() << " trace through BB#" + << MBB->getNumber() << '\n'); + // Set up loop bounds for the backwards post-order traversal. + LoopBounds Bounds(BlockInfo, MTM.Loops); + + // Run an upwards post-order search for the trace start. + Bounds.Downward = false; + Bounds.Visited.clear(); + typedef ipo_ext_iterator<const MachineBasicBlock*, LoopBounds> UpwardPO; + for (UpwardPO I = ipo_ext_begin(MBB, Bounds), E = ipo_ext_end(MBB, Bounds); + I != E; ++I) { + DEBUG(dbgs() << " pred for BB#" << I->getNumber() << ": "); + TraceBlockInfo &TBI = BlockInfo[I->getNumber()]; + // All the predecessors have been visited, pick the preferred one. + TBI.Pred = pickTracePred(*I); + DEBUG({ + if (TBI.Pred) + dbgs() << "BB#" << TBI.Pred->getNumber() << '\n'; + else + dbgs() << "null\n"; + }); + // The trace leading to I is now known, compute the depth resources. + computeDepthResources(*I); + } + + // Run a downwards post-order search for the trace end. + Bounds.Downward = true; + Bounds.Visited.clear(); + typedef po_ext_iterator<const MachineBasicBlock*, LoopBounds> DownwardPO; + for (DownwardPO I = po_ext_begin(MBB, Bounds), E = po_ext_end(MBB, Bounds); + I != E; ++I) { + DEBUG(dbgs() << " succ for BB#" << I->getNumber() << ": "); + TraceBlockInfo &TBI = BlockInfo[I->getNumber()]; + // All the successors have been visited, pick the preferred one. + TBI.Succ = pickTraceSucc(*I); + DEBUG({ + if (TBI.Succ) + dbgs() << "BB#" << TBI.Succ->getNumber() << '\n'; + else + dbgs() << "null\n"; + }); + // The trace leaving I is now known, compute the height resources. + computeHeightResources(*I); + } +} + +/// Invalidate traces through BadMBB. +void +MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) { + SmallVector<const MachineBasicBlock*, 16> WorkList; + TraceBlockInfo &BadTBI = BlockInfo[BadMBB->getNumber()]; + + // Invalidate height resources of blocks above MBB. + if (BadTBI.hasValidHeight()) { + BadTBI.invalidateHeight(); + WorkList.push_back(BadMBB); + do { + const MachineBasicBlock *MBB = WorkList.pop_back_val(); + DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName() + << " height.\n"); + // Find any MBB predecessors that have MBB as their preferred successor. + // They are the only ones that need to be invalidated. + for (MachineBasicBlock::const_pred_iterator + I = MBB->pred_begin(), E = MBB->pred_end(); I != E; ++I) { + TraceBlockInfo &TBI = BlockInfo[(*I)->getNumber()]; + if (!TBI.hasValidHeight()) + continue; + if (TBI.Succ == MBB) { + TBI.invalidateHeight(); + WorkList.push_back(*I); + continue; + } + // Verify that TBI.Succ is actually a *I successor. + assert((!TBI.Succ || (*I)->isSuccessor(TBI.Succ)) && "CFG changed"); + } + } while (!WorkList.empty()); + } + + // Invalidate depth resources of blocks below MBB. + if (BadTBI.hasValidDepth()) { + BadTBI.invalidateDepth(); + WorkList.push_back(BadMBB); + do { + const MachineBasicBlock *MBB = WorkList.pop_back_val(); + DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName() + << " depth.\n"); + // Find any MBB successors that have MBB as their preferred predecessor. + // They are the only ones that need to be invalidated. + for (MachineBasicBlock::const_succ_iterator + I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { + TraceBlockInfo &TBI = BlockInfo[(*I)->getNumber()]; + if (!TBI.hasValidDepth()) + continue; + if (TBI.Pred == MBB) { + TBI.invalidateDepth(); + WorkList.push_back(*I); + continue; + } + // Verify that TBI.Pred is actually a *I predecessor. + assert((!TBI.Pred || (*I)->isPredecessor(TBI.Pred)) && "CFG changed"); + } + } while (!WorkList.empty()); + } + + // Clear any per-instruction data. We only have to do this for BadMBB itself + // because the instructions in that block may change. Other blocks may be + // invalidated, but their instructions will stay the same, so there is no + // need to erase the Cycle entries. They will be overwritten when we + // recompute. + for (MachineBasicBlock::const_iterator I = BadMBB->begin(), E = BadMBB->end(); + I != E; ++I) + Cycles.erase(I); +} + +void MachineTraceMetrics::Ensemble::verify() const { +#ifndef NDEBUG + assert(BlockInfo.size() == MTM.MF->getNumBlockIDs() && + "Outdated BlockInfo size"); + for (unsigned Num = 0, e = BlockInfo.size(); Num != e; ++Num) { + const TraceBlockInfo &TBI = BlockInfo[Num]; + if (TBI.hasValidDepth() && TBI.Pred) { + const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num); + assert(MBB->isPredecessor(TBI.Pred) && "CFG doesn't match trace"); + assert(BlockInfo[TBI.Pred->getNumber()].hasValidDepth() && + "Trace is broken, depth should have been invalidated."); + const MachineLoop *Loop = getLoopFor(MBB); + assert(!(Loop && MBB == Loop->getHeader()) && "Trace contains backedge"); + } + if (TBI.hasValidHeight() && TBI.Succ) { + const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num); + assert(MBB->isSuccessor(TBI.Succ) && "CFG doesn't match trace"); + assert(BlockInfo[TBI.Succ->getNumber()].hasValidHeight() && + "Trace is broken, height should have been invalidated."); + const MachineLoop *Loop = getLoopFor(MBB); + const MachineLoop *SuccLoop = getLoopFor(TBI.Succ); + assert(!(Loop && Loop == SuccLoop && TBI.Succ == Loop->getHeader()) && + "Trace contains backedge"); + } + } +#endif +} + +//===----------------------------------------------------------------------===// +// Data Dependencies +//===----------------------------------------------------------------------===// +// +// Compute the depth and height of each instruction based on data dependencies +// and instruction latencies. These cycle numbers assume that the CPU can issue +// an infinite number of instructions per cycle as long as their dependencies +// are ready. + +// A data dependency is represented as a defining MI and operand numbers on the +// defining and using MI. +namespace { +struct DataDep { + const MachineInstr *DefMI; + unsigned DefOp; + unsigned UseOp; + + DataDep(const MachineInstr *DefMI, unsigned DefOp, unsigned UseOp) + : DefMI(DefMI), DefOp(DefOp), UseOp(UseOp) {} + + /// Create a DataDep from an SSA form virtual register. + DataDep(const MachineRegisterInfo *MRI, unsigned VirtReg, unsigned UseOp) + : UseOp(UseOp) { + assert(TargetRegisterInfo::isVirtualRegister(VirtReg)); + MachineRegisterInfo::def_iterator DefI = MRI->def_begin(VirtReg); + assert(!DefI.atEnd() && "Register has no defs"); + DefMI = &*DefI; + DefOp = DefI.getOperandNo(); + assert((++DefI).atEnd() && "Register has multiple defs"); + } +}; +} + +// Get the input data dependencies that must be ready before UseMI can issue. +// Return true if UseMI has any physreg operands. +static bool getDataDeps(const MachineInstr *UseMI, + SmallVectorImpl<DataDep> &Deps, + const MachineRegisterInfo *MRI) { + bool HasPhysRegs = false; + for (ConstMIOperands MO(UseMI); MO.isValid(); ++MO) { + if (!MO->isReg()) + continue; + unsigned Reg = MO->getReg(); + if (!Reg) + continue; + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + HasPhysRegs = true; + continue; + } + // Collect virtual register reads. + if (MO->readsReg()) + Deps.push_back(DataDep(MRI, Reg, MO.getOperandNo())); + } + return HasPhysRegs; +} + +// Get the input data dependencies of a PHI instruction, using Pred as the +// preferred predecessor. +// This will add at most one dependency to Deps. +static void getPHIDeps(const MachineInstr *UseMI, + SmallVectorImpl<DataDep> &Deps, + const MachineBasicBlock *Pred, + const MachineRegisterInfo *MRI) { + // No predecessor at the beginning of a trace. Ignore dependencies. + if (!Pred) + return; + assert(UseMI->isPHI() && UseMI->getNumOperands() % 2 && "Bad PHI"); + for (unsigned i = 1; i != UseMI->getNumOperands(); i += 2) { + if (UseMI->getOperand(i + 1).getMBB() == Pred) { + unsigned Reg = UseMI->getOperand(i).getReg(); + Deps.push_back(DataDep(MRI, Reg, i)); + return; + } + } +} + +// Keep track of physreg data dependencies by recording each live register unit. +// Associate each regunit with an instruction operand. Depending on the +// direction instructions are scanned, it could be the operand that defined the +// regunit, or the highest operand to read the regunit. +namespace { +struct LiveRegUnit { + unsigned RegUnit; + unsigned Cycle; + const MachineInstr *MI; + unsigned Op; + + unsigned getSparseSetIndex() const { return RegUnit; } + + LiveRegUnit(unsigned RU) : RegUnit(RU), Cycle(0), MI(0), Op(0) {} +}; +} + +// Identify physreg dependencies for UseMI, and update the live regunit +// tracking set when scanning instructions downwards. +static void updatePhysDepsDownwards(const MachineInstr *UseMI, + SmallVectorImpl<DataDep> &Deps, + SparseSet<LiveRegUnit> &RegUnits, + const TargetRegisterInfo *TRI) { + SmallVector<unsigned, 8> Kills; + SmallVector<unsigned, 8> LiveDefOps; + + for (ConstMIOperands MO(UseMI); MO.isValid(); ++MO) { + if (!MO->isReg()) + continue; + unsigned Reg = MO->getReg(); + if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + // Track live defs and kills for updating RegUnits. + if (MO->isDef()) { + if (MO->isDead()) + Kills.push_back(Reg); + else + LiveDefOps.push_back(MO.getOperandNo()); + } else if (MO->isKill()) + Kills.push_back(Reg); + // Identify dependencies. + if (!MO->readsReg()) + continue; + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { + SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units); + if (I == RegUnits.end()) + continue; + Deps.push_back(DataDep(I->MI, I->Op, MO.getOperandNo())); + break; + } + } + + // Update RegUnits to reflect live registers after UseMI. + // First kills. + for (unsigned i = 0, e = Kills.size(); i != e; ++i) + for (MCRegUnitIterator Units(Kills[i], TRI); Units.isValid(); ++Units) + RegUnits.erase(*Units); + + // Second, live defs. + for (unsigned i = 0, e = LiveDefOps.size(); i != e; ++i) { + unsigned DefOp = LiveDefOps[i]; + for (MCRegUnitIterator Units(UseMI->getOperand(DefOp).getReg(), TRI); + Units.isValid(); ++Units) { + LiveRegUnit &LRU = RegUnits[*Units]; + LRU.MI = UseMI; + LRU.Op = DefOp; + } + } +} + +/// The length of the critical path through a trace is the maximum of two path +/// lengths: +/// +/// 1. The maximum height+depth over all instructions in the trace center block. +/// +/// 2. The longest cross-block dependency chain. For small blocks, it is +/// possible that the critical path through the trace doesn't include any +/// instructions in the block. +/// +/// This function computes the second number from the live-in list of the +/// center block. +unsigned MachineTraceMetrics::Ensemble:: +computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) { + assert(TBI.HasValidInstrDepths && "Missing depth info"); + assert(TBI.HasValidInstrHeights && "Missing height info"); + unsigned MaxLen = 0; + for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) { + const LiveInReg &LIR = TBI.LiveIns[i]; + if (!TargetRegisterInfo::isVirtualRegister(LIR.Reg)) + continue; + const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg); + // Ignore dependencies outside the current trace. + const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()]; + if (!DefTBI.hasValidDepth() || DefTBI.Head != TBI.Head) + continue; + unsigned Len = LIR.Height + Cycles[DefMI].Depth; + MaxLen = std::max(MaxLen, Len); + } + return MaxLen; +} + +/// Compute instruction depths for all instructions above or in MBB in its +/// trace. This assumes that the trace through MBB has already been computed. +void MachineTraceMetrics::Ensemble:: +computeInstrDepths(const MachineBasicBlock *MBB) { + // The top of the trace may already be computed, and HasValidInstrDepths + // implies Head->HasValidInstrDepths, so we only need to start from the first + // block in the trace that needs to be recomputed. + SmallVector<const MachineBasicBlock*, 8> Stack; + do { + TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; + assert(TBI.hasValidDepth() && "Incomplete trace"); + if (TBI.HasValidInstrDepths) + break; + Stack.push_back(MBB); + MBB = TBI.Pred; + } while (MBB); + + // FIXME: If MBB is non-null at this point, it is the last pre-computed block + // in the trace. We should track any live-out physregs that were defined in + // the trace. This is quite rare in SSA form, typically created by CSE + // hoisting a compare. + SparseSet<LiveRegUnit> RegUnits; + RegUnits.setUniverse(MTM.TRI->getNumRegUnits()); + + // Go through trace blocks in top-down order, stopping after the center block. + SmallVector<DataDep, 8> Deps; + while (!Stack.empty()) { + MBB = Stack.pop_back_val(); + DEBUG(dbgs() << "Depths for BB#" << MBB->getNumber() << ":\n"); + TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; + TBI.HasValidInstrDepths = true; + TBI.CriticalPath = 0; + + // Also compute the critical path length through MBB when possible. + if (TBI.HasValidInstrHeights) + TBI.CriticalPath = computeCrossBlockCriticalPath(TBI); + + for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + const MachineInstr *UseMI = I; + + // Collect all data dependencies. + Deps.clear(); + if (UseMI->isPHI()) + getPHIDeps(UseMI, Deps, TBI.Pred, MTM.MRI); + else if (getDataDeps(UseMI, Deps, MTM.MRI)) + updatePhysDepsDownwards(UseMI, Deps, RegUnits, MTM.TRI); + + // Filter and process dependencies, computing the earliest issue cycle. + unsigned Cycle = 0; + for (unsigned i = 0, e = Deps.size(); i != e; ++i) { + const DataDep &Dep = Deps[i]; + const TraceBlockInfo&DepTBI = + BlockInfo[Dep.DefMI->getParent()->getNumber()]; + // Ignore dependencies from outside the current trace. + if (!DepTBI.hasValidDepth() || DepTBI.Head != TBI.Head) + continue; + assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency"); + unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth; + // Add latency if DefMI is a real instruction. Transients get latency 0. + if (!Dep.DefMI->isTransient()) + DepCycle += MTM.TII->computeOperandLatency(MTM.ItinData, + Dep.DefMI, Dep.DefOp, + UseMI, Dep.UseOp, + /* FindMin = */ false); + Cycle = std::max(Cycle, DepCycle); + } + // Remember the instruction depth. + InstrCycles &MICycles = Cycles[UseMI]; + MICycles.Depth = Cycle; + + if (!TBI.HasValidInstrHeights) { + DEBUG(dbgs() << Cycle << '\t' << *UseMI); + continue; + } + // Update critical path length. + TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height); + DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << *UseMI); + } + } +} + +// Identify physreg dependencies for MI when scanning instructions upwards. +// Return the issue height of MI after considering any live regunits. +// Height is the issue height computed from virtual register dependencies alone. +static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height, + SparseSet<LiveRegUnit> &RegUnits, + const InstrItineraryData *ItinData, + const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI) { + SmallVector<unsigned, 8> ReadOps; + for (ConstMIOperands MO(MI); MO.isValid(); ++MO) { + if (!MO->isReg()) + continue; + unsigned Reg = MO->getReg(); + if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + if (MO->readsReg()) + ReadOps.push_back(MO.getOperandNo()); + if (!MO->isDef()) + continue; + // This is a def of Reg. Remove corresponding entries from RegUnits, and + // update MI Height to consider the physreg dependencies. + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { + SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units); + if (I == RegUnits.end()) + continue; + unsigned DepHeight = I->Cycle; + if (!MI->isTransient()) { + // We may not know the UseMI of this dependency, if it came from the + // live-in list. + if (I->MI) + DepHeight += TII->computeOperandLatency(ItinData, + MI, MO.getOperandNo(), + I->MI, I->Op); + else + // No UseMI. Just use the MI latency instead. + DepHeight += TII->getInstrLatency(ItinData, MI); + } + Height = std::max(Height, DepHeight); + // This regunit is dead above MI. + RegUnits.erase(I); + } + } + + // Now we know the height of MI. Update any regunits read. + for (unsigned i = 0, e = ReadOps.size(); i != e; ++i) { + unsigned Reg = MI->getOperand(ReadOps[i]).getReg(); + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { + LiveRegUnit &LRU = RegUnits[*Units]; + // Set the height to the highest reader of the unit. + if (LRU.Cycle <= Height && LRU.MI != MI) { + LRU.Cycle = Height; + LRU.MI = MI; + LRU.Op = ReadOps[i]; + } + } + } + + return Height; +} + + +typedef DenseMap<const MachineInstr *, unsigned> MIHeightMap; + +// Push the height of DefMI upwards if required to match UseMI. +// Return true if this is the first time DefMI was seen. +static bool pushDepHeight(const DataDep &Dep, + const MachineInstr *UseMI, unsigned UseHeight, + MIHeightMap &Heights, + const InstrItineraryData *ItinData, + const TargetInstrInfo *TII) { + // Adjust height by Dep.DefMI latency. + if (!Dep.DefMI->isTransient()) + UseHeight += TII->computeOperandLatency(ItinData, Dep.DefMI, Dep.DefOp, + UseMI, Dep.UseOp); + + // Update Heights[DefMI] to be the maximum height seen. + MIHeightMap::iterator I; + bool New; + tie(I, New) = Heights.insert(std::make_pair(Dep.DefMI, UseHeight)); + if (New) + return true; + + // DefMI has been pushed before. Give it the max height. + if (I->second < UseHeight) + I->second = UseHeight; + return false; +} + +/// Assuming that DefMI was used by Trace.back(), add it to the live-in lists +/// of all the blocks in Trace. Stop when reaching the block that contains +/// DefMI. +void MachineTraceMetrics::Ensemble:: +addLiveIns(const MachineInstr *DefMI, + ArrayRef<const MachineBasicBlock*> Trace) { + assert(!Trace.empty() && "Trace should contain at least one block"); + unsigned Reg = DefMI->getOperand(0).getReg(); + assert(TargetRegisterInfo::isVirtualRegister(Reg)); + const MachineBasicBlock *DefMBB = DefMI->getParent(); + + // Reg is live-in to all blocks in Trace that follow DefMBB. + for (unsigned i = Trace.size(); i; --i) { + const MachineBasicBlock *MBB = Trace[i-1]; + if (MBB == DefMBB) + return; + TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; + // Just add the register. The height will be updated later. + TBI.LiveIns.push_back(Reg); + } +} + +/// Compute instruction heights in the trace through MBB. This updates MBB and +/// the blocks below it in the trace. It is assumed that the trace has already +/// been computed. +void MachineTraceMetrics::Ensemble:: +computeInstrHeights(const MachineBasicBlock *MBB) { + // The bottom of the trace may already be computed. + // Find the blocks that need updating. + SmallVector<const MachineBasicBlock*, 8> Stack; + do { + TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; + assert(TBI.hasValidHeight() && "Incomplete trace"); + if (TBI.HasValidInstrHeights) + break; + Stack.push_back(MBB); + TBI.LiveIns.clear(); + MBB = TBI.Succ; + } while (MBB); + + // As we move upwards in the trace, keep track of instructions that are + // required by deeper trace instructions. Map MI -> height required so far. + MIHeightMap Heights; + + // For physregs, the def isn't known when we see the use. + // Instead, keep track of the highest use of each regunit. + SparseSet<LiveRegUnit> RegUnits; + RegUnits.setUniverse(MTM.TRI->getNumRegUnits()); + + // If the bottom of the trace was already precomputed, initialize heights + // from its live-in list. + // MBB is the highest precomputed block in the trace. + if (MBB) { + TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; + for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) { + LiveInReg LI = TBI.LiveIns[i]; + if (TargetRegisterInfo::isVirtualRegister(LI.Reg)) { + // For virtual registers, the def latency is included. + unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)]; + if (Height < LI.Height) + Height = LI.Height; + } else { + // For register units, the def latency is not included because we don't + // know the def yet. + RegUnits[LI.Reg].Cycle = LI.Height; + } + } + } + + // Go through the trace blocks in bottom-up order. + SmallVector<DataDep, 8> Deps; + for (;!Stack.empty(); Stack.pop_back()) { + MBB = Stack.back(); + DEBUG(dbgs() << "Heights for BB#" << MBB->getNumber() << ":\n"); + TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; + TBI.HasValidInstrHeights = true; + TBI.CriticalPath = 0; + + // Get dependencies from PHIs in the trace successor. + const MachineBasicBlock *Succ = TBI.Succ; + // If MBB is the last block in the trace, and it has a back-edge to the + // loop header, get loop-carried dependencies from PHIs in the header. For + // that purpose, pretend that all the loop header PHIs have height 0. + if (!Succ) + if (const MachineLoop *Loop = getLoopFor(MBB)) + if (MBB->isSuccessor(Loop->getHeader())) + Succ = Loop->getHeader(); + + if (Succ) { + for (MachineBasicBlock::const_iterator I = Succ->begin(), E = Succ->end(); + I != E && I->isPHI(); ++I) { + const MachineInstr *PHI = I; + Deps.clear(); + getPHIDeps(PHI, Deps, MBB, MTM.MRI); + if (!Deps.empty()) { + // Loop header PHI heights are all 0. + unsigned Height = TBI.Succ ? Cycles.lookup(PHI).Height : 0; + DEBUG(dbgs() << "pred\t" << Height << '\t' << *PHI); + if (pushDepHeight(Deps.front(), PHI, Height, + Heights, MTM.ItinData, MTM.TII)) + addLiveIns(Deps.front().DefMI, Stack); + } + } + } + + // Go through the block backwards. + for (MachineBasicBlock::const_iterator BI = MBB->end(), BB = MBB->begin(); + BI != BB;) { + const MachineInstr *MI = --BI; + + // Find the MI height as determined by virtual register uses in the + // trace below. + unsigned Cycle = 0; + MIHeightMap::iterator HeightI = Heights.find(MI); + if (HeightI != Heights.end()) { + Cycle = HeightI->second; + // We won't be seeing any more MI uses. + Heights.erase(HeightI); + } + + // Don't process PHI deps. They depend on the specific predecessor, and + // we'll get them when visiting the predecessor. + Deps.clear(); + bool HasPhysRegs = !MI->isPHI() && getDataDeps(MI, Deps, MTM.MRI); + + // There may also be regunit dependencies to include in the height. + if (HasPhysRegs) + Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits, + MTM.ItinData, MTM.TII, MTM.TRI); + + // Update the required height of any virtual registers read by MI. + for (unsigned i = 0, e = Deps.size(); i != e; ++i) + if (pushDepHeight(Deps[i], MI, Cycle, Heights, MTM.ItinData, MTM.TII)) + addLiveIns(Deps[i].DefMI, Stack); + + InstrCycles &MICycles = Cycles[MI]; + MICycles.Height = Cycle; + if (!TBI.HasValidInstrDepths) { + DEBUG(dbgs() << Cycle << '\t' << *MI); + continue; + } + // Update critical path length. + TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Depth); + DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << *MI); + } + + // Update virtual live-in heights. They were added by addLiveIns() with a 0 + // height because the final height isn't known until now. + DEBUG(dbgs() << "BB#" << MBB->getNumber() << " Live-ins:"); + for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) { + LiveInReg &LIR = TBI.LiveIns[i]; + const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg); + LIR.Height = Heights.lookup(DefMI); + DEBUG(dbgs() << ' ' << PrintReg(LIR.Reg) << '@' << LIR.Height); + } + + // Transfer the live regunits to the live-in list. + for (SparseSet<LiveRegUnit>::const_iterator + RI = RegUnits.begin(), RE = RegUnits.end(); RI != RE; ++RI) { + TBI.LiveIns.push_back(LiveInReg(RI->RegUnit, RI->Cycle)); + DEBUG(dbgs() << ' ' << PrintRegUnit(RI->RegUnit, MTM.TRI) + << '@' << RI->Cycle); + } + DEBUG(dbgs() << '\n'); + + if (!TBI.HasValidInstrDepths) + continue; + // Add live-ins to the critical path length. + TBI.CriticalPath = std::max(TBI.CriticalPath, + computeCrossBlockCriticalPath(TBI)); + DEBUG(dbgs() << "Critical path: " << TBI.CriticalPath << '\n'); + } +} + +MachineTraceMetrics::Trace +MachineTraceMetrics::Ensemble::getTrace(const MachineBasicBlock *MBB) { + // FIXME: Check cache tags, recompute as needed. + computeTrace(MBB); + computeInstrDepths(MBB); + computeInstrHeights(MBB); + return Trace(*this, BlockInfo[MBB->getNumber()]); +} + +unsigned +MachineTraceMetrics::Trace::getInstrSlack(const MachineInstr *MI) const { + assert(MI && "Not an instruction."); + assert(getBlockNum() == unsigned(MI->getParent()->getNumber()) && + "MI must be in the trace center block"); + InstrCycles Cyc = getInstrCycles(MI); + return getCriticalPath() - (Cyc.Depth + Cyc.Height); +} + +unsigned +MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr *PHI) const { + const MachineBasicBlock *MBB = TE.MTM.MF->getBlockNumbered(getBlockNum()); + SmallVector<DataDep, 1> Deps; + getPHIDeps(PHI, Deps, MBB, TE.MTM.MRI); + assert(Deps.size() == 1 && "PHI doesn't have MBB as a predecessor"); + DataDep &Dep = Deps.front(); + unsigned DepCycle = getInstrCycles(Dep.DefMI).Depth; + // Add latency if DefMI is a real instruction. Transients get latency 0. + if (!Dep.DefMI->isTransient()) + DepCycle += TE.MTM.TII->computeOperandLatency(TE.MTM.ItinData, + Dep.DefMI, Dep.DefOp, + PHI, Dep.UseOp, + /* FindMin = */ false); + return DepCycle; +} + +unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const { + // For now, we compute the resource depth from instruction count / issue + // width. Eventually, we should compute resource depth per functional unit + // and return the max. + unsigned Instrs = TBI.InstrDepth; + if (Bottom) + Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount; + if (const MCSchedModel *Model = TE.MTM.ItinData->SchedModel) + if (Model->IssueWidth != 0) + return Instrs / Model->IssueWidth; + // Assume issue width 1 without a schedule model. + return Instrs; +} + +unsigned MachineTraceMetrics::Trace:: +getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks) const { + unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight; + for (unsigned i = 0, e = Extrablocks.size(); i != e; ++i) + Instrs += TE.MTM.getResources(Extrablocks[i])->InstrCount; + if (const MCSchedModel *Model = TE.MTM.ItinData->SchedModel) + if (Model->IssueWidth != 0) + return Instrs / Model->IssueWidth; + // Assume issue width 1 without a schedule model. + return Instrs; +} + +void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const { + OS << getName() << " ensemble:\n"; + for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) { + OS << " BB#" << i << '\t'; + BlockInfo[i].print(OS); + OS << '\n'; + } +} + +void MachineTraceMetrics::TraceBlockInfo::print(raw_ostream &OS) const { + if (hasValidDepth()) { + OS << "depth=" << InstrDepth; + if (Pred) + OS << " pred=BB#" << Pred->getNumber(); + else + OS << " pred=null"; + OS << " head=BB#" << Head; + if (HasValidInstrDepths) + OS << " +instrs"; + } else + OS << "depth invalid"; + OS << ", "; + if (hasValidHeight()) { + OS << "height=" << InstrHeight; + if (Succ) + OS << " succ=BB#" << Succ->getNumber(); + else + OS << " succ=null"; + OS << " tail=BB#" << Tail; + if (HasValidInstrHeights) + OS << " +instrs"; + } else + OS << "height invalid"; + if (HasValidInstrDepths && HasValidInstrHeights) + OS << ", crit=" << CriticalPath; +} + +void MachineTraceMetrics::Trace::print(raw_ostream &OS) const { + unsigned MBBNum = &TBI - &TE.BlockInfo[0]; + + OS << TE.getName() << " trace BB#" << TBI.Head << " --> BB#" << MBBNum + << " --> BB#" << TBI.Tail << ':'; + if (TBI.hasValidHeight() && TBI.hasValidDepth()) + OS << ' ' << getInstrCount() << " instrs."; + if (TBI.HasValidInstrDepths && TBI.HasValidInstrHeights) + OS << ' ' << TBI.CriticalPath << " cycles."; + + const MachineTraceMetrics::TraceBlockInfo *Block = &TBI; + OS << "\nBB#" << MBBNum; + while (Block->hasValidDepth() && Block->Pred) { + unsigned Num = Block->Pred->getNumber(); + OS << " <- BB#" << Num; + Block = &TE.BlockInfo[Num]; + } + + Block = &TBI; + OS << "\n "; + while (Block->hasValidHeight() && Block->Succ) { + unsigned Num = Block->Succ->getNumber(); + OS << " -> BB#" << Num; + Block = &TE.BlockInfo[Num]; + } + OS << '\n'; +} diff --git a/lib/CodeGen/MachineTraceMetrics.h b/lib/CodeGen/MachineTraceMetrics.h new file mode 100644 index 0000000..c5b86f3 --- /dev/null +++ b/lib/CodeGen/MachineTraceMetrics.h @@ -0,0 +1,341 @@ +//===- lib/CodeGen/MachineTraceMetrics.h - Super-scalar metrics -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interface for the MachineTraceMetrics analysis pass +// that estimates CPU resource usage and critical data dependency paths through +// preferred traces. This is useful for super-scalar CPUs where execution speed +// can be limited both by data dependencies and by limited execution resources. +// +// Out-of-order CPUs will often be executing instructions from multiple basic +// blocks at the same time. This makes it difficult to estimate the resource +// usage accurately in a single basic block. Resources can be estimated better +// by looking at a trace through the current basic block. +// +// For every block, the MachineTraceMetrics pass will pick a preferred trace +// that passes through the block. The trace is chosen based on loop structure, +// branch probabilities, and resource usage. The intention is to pick likely +// traces that would be the most affected by code transformations. +// +// It is expensive to compute a full arbitrary trace for every block, so to +// save some computations, traces are chosen to be convergent. This means that +// if the traces through basic blocks A and B ever cross when moving away from +// A and B, they never diverge again. This applies in both directions - If the +// traces meet above A and B, they won't diverge when going further back. +// +// Traces tend to align with loops. The trace through a block in an inner loop +// will begin at the loop entry block and end at a back edge. If there are +// nested loops, the trace may begin and end at those instead. +// +// For each trace, we compute the critical path length, which is the number of +// cycles required to execute the trace when execution is limited by data +// dependencies only. We also compute the resource height, which is the number +// of cycles required to execute all instructions in the trace when ignoring +// data dependencies. +// +// Every instruction in the current block has a slack - the number of cycles +// execution of the instruction can be delayed without extending the critical +// path. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_MACHINE_TRACE_METRICS_H +#define LLVM_CODEGEN_MACHINE_TRACE_METRICS_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/MachineFunctionPass.h" + +namespace llvm { + +class InstrItineraryData; +class MachineBasicBlock; +class MachineInstr; +class MachineLoop; +class MachineLoopInfo; +class MachineRegisterInfo; +class TargetInstrInfo; +class TargetRegisterInfo; +class raw_ostream; + +class MachineTraceMetrics : public MachineFunctionPass { + const MachineFunction *MF; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + const InstrItineraryData *ItinData; + const MachineRegisterInfo *MRI; + const MachineLoopInfo *Loops; + +public: + class Ensemble; + class Trace; + static char ID; + MachineTraceMetrics(); + void getAnalysisUsage(AnalysisUsage&) const; + bool runOnMachineFunction(MachineFunction&); + void releaseMemory(); + void verifyAnalysis() const; + + friend class Ensemble; + friend class Trace; + + /// Per-basic block information that doesn't depend on the trace through the + /// block. + struct FixedBlockInfo { + /// The number of non-trivial instructions in the block. + /// Doesn't count PHI and COPY instructions that are likely to be removed. + unsigned InstrCount; + + /// True when the block contains calls. + bool HasCalls; + + FixedBlockInfo() : InstrCount(~0u), HasCalls(false) {} + + /// Returns true when resource information for this block has been computed. + bool hasResources() const { return InstrCount != ~0u; } + + /// Invalidate resource information. + void invalidate() { InstrCount = ~0u; } + }; + + /// Get the fixed resource information about MBB. Compute it on demand. + const FixedBlockInfo *getResources(const MachineBasicBlock*); + + /// A virtual register or regunit required by a basic block or its trace + /// successors. + struct LiveInReg { + /// The virtual register required, or a register unit. + unsigned Reg; + + /// For virtual registers: Minimum height of the defining instruction. + /// For regunits: Height of the highest user in the trace. + unsigned Height; + + LiveInReg(unsigned Reg, unsigned Height = 0) : Reg(Reg), Height(Height) {} + }; + + /// Per-basic block information that relates to a specific trace through the + /// block. Convergent traces means that only one of these is required per + /// block in a trace ensemble. + struct TraceBlockInfo { + /// Trace predecessor, or NULL for the first block in the trace. + /// Valid when hasValidDepth(). + const MachineBasicBlock *Pred; + + /// Trace successor, or NULL for the last block in the trace. + /// Valid when hasValidHeight(). + const MachineBasicBlock *Succ; + + /// The block number of the head of the trace. (When hasValidDepth()). + unsigned Head; + + /// The block number of the tail of the trace. (When hasValidHeight()). + unsigned Tail; + + /// Accumulated number of instructions in the trace above this block. + /// Does not include instructions in this block. + unsigned InstrDepth; + + /// Accumulated number of instructions in the trace below this block. + /// Includes instructions in this block. + unsigned InstrHeight; + + TraceBlockInfo() : + Pred(0), Succ(0), + InstrDepth(~0u), InstrHeight(~0u), + HasValidInstrDepths(false), HasValidInstrHeights(false) {} + + /// Returns true if the depth resources have been computed from the trace + /// above this block. + bool hasValidDepth() const { return InstrDepth != ~0u; } + + /// Returns true if the height resources have been computed from the trace + /// below this block. + bool hasValidHeight() const { return InstrHeight != ~0u; } + + /// Invalidate depth resources when some block above this one has changed. + void invalidateDepth() { InstrDepth = ~0u; HasValidInstrDepths = false; } + + /// Invalidate height resources when a block below this one has changed. + void invalidateHeight() { InstrHeight = ~0u; HasValidInstrHeights = false; } + + // Data-dependency-related information. Per-instruction depth and height + // are computed from data dependencies in the current trace, using + // itinerary data. + + /// Instruction depths have been computed. This implies hasValidDepth(). + bool HasValidInstrDepths; + + /// Instruction heights have been computed. This implies hasValidHeight(). + bool HasValidInstrHeights; + + /// Critical path length. This is the number of cycles in the longest data + /// dependency chain through the trace. This is only valid when both + /// HasValidInstrDepths and HasValidInstrHeights are set. + unsigned CriticalPath; + + /// Live-in registers. These registers are defined above the current block + /// and used by this block or a block below it. + /// This does not include PHI uses in the current block, but it does + /// include PHI uses in deeper blocks. + SmallVector<LiveInReg, 4> LiveIns; + + void print(raw_ostream&) const; + }; + + /// InstrCycles represents the cycle height and depth of an instruction in a + /// trace. + struct InstrCycles { + /// Earliest issue cycle as determined by data dependencies and instruction + /// latencies from the beginning of the trace. Data dependencies from + /// before the trace are not included. + unsigned Depth; + + /// Minimum number of cycles from this instruction is issued to the of the + /// trace, as determined by data dependencies and instruction latencies. + unsigned Height; + }; + + /// A trace represents a plausible sequence of executed basic blocks that + /// passes through the current basic block one. The Trace class serves as a + /// handle to internal cached data structures. + class Trace { + Ensemble &TE; + TraceBlockInfo &TBI; + + unsigned getBlockNum() const { return &TBI - &TE.BlockInfo[0]; } + + public: + explicit Trace(Ensemble &te, TraceBlockInfo &tbi) : TE(te), TBI(tbi) {} + void print(raw_ostream&) const; + + /// Compute the total number of instructions in the trace. + unsigned getInstrCount() const { + return TBI.InstrDepth + TBI.InstrHeight; + } + + /// Return the resource depth of the top/bottom of the trace center block. + /// This is the number of cycles required to execute all instructions from + /// the trace head to the trace center block. The resource depth only + /// considers execution resources, it ignores data dependencies. + /// When Bottom is set, instructions in the trace center block are included. + unsigned getResourceDepth(bool Bottom) const; + + /// Return the resource length of the trace. This is the number of cycles + /// required to execute the instructions in the trace if they were all + /// independent, exposing the maximum instruction-level parallelism. + /// + /// Any blocks in Extrablocks are included as if they were part of the + /// trace. + unsigned getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks = + ArrayRef<const MachineBasicBlock*>()) const; + + /// Return the length of the (data dependency) critical path through the + /// trace. + unsigned getCriticalPath() const { return TBI.CriticalPath; } + + /// Return the depth and height of MI. The depth is only valid for + /// instructions in or above the trace center block. The height is only + /// valid for instructions in or below the trace center block. + InstrCycles getInstrCycles(const MachineInstr *MI) const { + return TE.Cycles.lookup(MI); + } + + /// Return the slack of MI. This is the number of cycles MI can be delayed + /// before the critical path becomes longer. + /// MI must be an instruction in the trace center block. + unsigned getInstrSlack(const MachineInstr *MI) const; + + /// Return the Depth of a PHI instruction in a trace center block successor. + /// The PHI does not have to be part of the trace. + unsigned getPHIDepth(const MachineInstr *PHI) const; + }; + + /// A trace ensemble is a collection of traces selected using the same + /// strategy, for example 'minimum resource height'. There is one trace for + /// every block in the function. + class Ensemble { + SmallVector<TraceBlockInfo, 4> BlockInfo; + DenseMap<const MachineInstr*, InstrCycles> Cycles; + friend class Trace; + + void computeTrace(const MachineBasicBlock*); + void computeDepthResources(const MachineBasicBlock*); + void computeHeightResources(const MachineBasicBlock*); + unsigned computeCrossBlockCriticalPath(const TraceBlockInfo&); + void computeInstrDepths(const MachineBasicBlock*); + void computeInstrHeights(const MachineBasicBlock*); + void addLiveIns(const MachineInstr *DefMI, + ArrayRef<const MachineBasicBlock*> Trace); + + protected: + MachineTraceMetrics &MTM; + virtual const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) =0; + virtual const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) =0; + explicit Ensemble(MachineTraceMetrics*); + const MachineLoop *getLoopFor(const MachineBasicBlock*) const; + const TraceBlockInfo *getDepthResources(const MachineBasicBlock*) const; + const TraceBlockInfo *getHeightResources(const MachineBasicBlock*) const; + + public: + virtual ~Ensemble(); + virtual const char *getName() const =0; + void print(raw_ostream&) const; + void invalidate(const MachineBasicBlock *MBB); + void verify() const; + + /// Get the trace that passes through MBB. + /// The trace is computed on demand. + Trace getTrace(const MachineBasicBlock *MBB); + }; + + /// Strategies for selecting traces. + enum Strategy { + /// Select the trace through a block that has the fewest instructions. + TS_MinInstrCount, + + TS_NumStrategies + }; + + /// Get the trace ensemble representing the given trace selection strategy. + /// The returned Ensemble object is owned by the MachineTraceMetrics analysis, + /// and valid for the lifetime of the analysis pass. + Ensemble *getEnsemble(Strategy); + + /// Invalidate cached information about MBB. This must be called *before* MBB + /// is erased, or the CFG is otherwise changed. + /// + /// This invalidates per-block information about resource usage for MBB only, + /// and it invalidates per-trace information for any trace that passes + /// through MBB. + /// + /// Call Ensemble::getTrace() again to update any trace handles. + void invalidate(const MachineBasicBlock *MBB); + +private: + // One entry per basic block, indexed by block number. + SmallVector<FixedBlockInfo, 4> BlockInfo; + + // One ensemble per strategy. + Ensemble* Ensembles[TS_NumStrategies]; +}; + +inline raw_ostream &operator<<(raw_ostream &OS, + const MachineTraceMetrics::Trace &Tr) { + Tr.print(OS); + return OS; +} + +inline raw_ostream &operator<<(raw_ostream &OS, + const MachineTraceMetrics::Ensemble &En) { + En.print(OS); + return OS; +} +} // end namespace llvm + +#endif diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index d8dece6..852c169 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -73,8 +73,10 @@ namespace { typedef SmallVector<const uint32_t*, 4> RegMaskVector; typedef DenseSet<unsigned> RegSet; typedef DenseMap<unsigned, const MachineInstr*> RegMap; + typedef SmallPtrSet<const MachineBasicBlock*, 8> BlockSet; const MachineInstr *FirstTerminator; + BlockSet FunctionBlocks; BitVector regsReserved; BitVector regsAllocatable; @@ -117,6 +119,9 @@ namespace { // block. This set is disjoint from regsLiveOut. RegSet vregsRequired; + // Set versions of block's predecessor and successor lists. + BlockSet Preds, Succs; + BBInfo() : reachable(false) {} // Add register to vregsPassed if it belongs there. Return true if @@ -203,6 +208,10 @@ namespace { void report(const char *msg, const MachineBasicBlock *MBB); void report(const char *msg, const MachineInstr *MI); void report(const char *msg, const MachineOperand *MO, unsigned MONum); + void report(const char *msg, const MachineFunction *MF, + const LiveInterval &LI); + void report(const char *msg, const MachineBasicBlock *MBB, + const LiveInterval &LI); void checkLiveness(const MachineOperand *MO, unsigned MONum); void markReachable(const MachineBasicBlock *MBB); @@ -212,6 +221,10 @@ namespace { void calcRegsRequired(); void verifyLiveVariables(); void verifyLiveIntervals(); + void verifyLiveInterval(const LiveInterval&); + void verifyLiveIntervalValue(const LiveInterval&, VNInfo*); + void verifyLiveIntervalSegment(const LiveInterval&, + LiveInterval::const_iterator); }; struct MachineVerifierPass : public MachineFunctionPass { @@ -350,9 +363,9 @@ void MachineVerifier::report(const char *msg, const MachineFunction *MF) { void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) { assert(MBB); report(msg, MBB->getParent()); - *OS << "- basic block: " << MBB->getName() - << " " << (void*)MBB - << " (BB#" << MBB->getNumber() << ")"; + *OS << "- basic block: BB#" << MBB->getNumber() + << ' ' << MBB->getName() + << " (" << (void*)MBB << ')'; if (Indexes) *OS << " [" << Indexes->getMBBStartIdx(MBB) << ';' << Indexes->getMBBEndIdx(MBB) << ')'; @@ -377,6 +390,28 @@ void MachineVerifier::report(const char *msg, *OS << "\n"; } +void MachineVerifier::report(const char *msg, const MachineFunction *MF, + const LiveInterval &LI) { + report(msg, MF); + *OS << "- interval: "; + if (TargetRegisterInfo::isVirtualRegister(LI.reg)) + *OS << PrintReg(LI.reg, TRI); + else + *OS << PrintRegUnit(LI.reg, TRI); + *OS << ' ' << LI << '\n'; +} + +void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB, + const LiveInterval &LI) { + report(msg, MBB); + *OS << "- interval: "; + if (TargetRegisterInfo::isVirtualRegister(LI.reg)) + *OS << PrintReg(LI.reg, TRI); + else + *OS << PrintRegUnit(LI.reg, TRI); + *OS << ' ' << LI << '\n'; +} + void MachineVerifier::markReachable(const MachineBasicBlock *MBB) { BBInfo &MInfo = MBBInfoMap[MBB]; if (!MInfo.reachable) { @@ -404,6 +439,22 @@ void MachineVerifier::visitMachineFunctionBefore() { regsAllocatable = TRI->getAllocatableSet(*MF); markReachable(&MF->front()); + + // Build a set of the basic blocks in the function. + FunctionBlocks.clear(); + for (MachineFunction::const_iterator + I = MF->begin(), E = MF->end(); I != E; ++I) { + FunctionBlocks.insert(I); + BBInfo &MInfo = MBBInfoMap[I]; + + MInfo.Preds.insert(I->pred_begin(), I->pred_end()); + if (MInfo.Preds.size() != I->pred_size()) + report("MBB has duplicate entries in its predecessor list.", I); + + MInfo.Succs.insert(I->succ_begin(), I->succ_end()); + if (MInfo.Succs.size() != I->succ_size()) + report("MBB has duplicate entries in its successor list.", I); + } } // Does iterator point to a and b as the first two elements? @@ -440,6 +491,25 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { E = MBB->succ_end(); I != E; ++I) { if ((*I)->isLandingPad()) LandingPadSuccs.insert(*I); + if (!FunctionBlocks.count(*I)) + report("MBB has successor that isn't part of the function.", MBB); + if (!MBBInfoMap[*I].Preds.count(MBB)) { + report("Inconsistent CFG", MBB); + *OS << "MBB is not in the predecessor list of the successor BB#" + << (*I)->getNumber() << ".\n"; + } + } + + // Check the predecessor list. + for (MachineBasicBlock::const_pred_iterator I = MBB->pred_begin(), + E = MBB->pred_end(); I != E; ++I) { + if (!FunctionBlocks.count(*I)) + report("MBB has predecessor that isn't part of the function.", MBB); + if (!MBBInfoMap[*I].Succs.count(MBB)) { + report("Inconsistent CFG", MBB); + *OS << "MBB is not in the successor list of the predecessor BB#" + << (*I)->getNumber() << ".\n"; + } } const MCAsmInfo *AsmInfo = TM->getMCAsmInfo(); @@ -510,7 +580,15 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { ++MBBI; if (MBBI == MF->end()) { report("MBB conditionally falls through out of function!", MBB); - } if (MBB->succ_size() != 2) { + } if (MBB->succ_size() == 1) { + // A conditional branch with only one successor is weird, but allowed. + if (&*MBBI != TBB) + report("MBB exits via conditional branch/fall-through but only has " + "one CFG successor!", MBB); + else if (TBB != *MBB->succ_begin()) + report("MBB exits via conditional branch/fall-through but the CFG " + "successor don't match the actual successor!", MBB); + } else if (MBB->succ_size() != 2) { report("MBB exits via conditional branch/fall-through but doesn't have " "exactly two CFG successors!", MBB); } else if (!matchPair(MBB->succ_begin(), TBB, MBBI)) { @@ -530,7 +608,15 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { } else if (TBB && FBB) { // Block conditionally branches somewhere, otherwise branches // somewhere else. - if (MBB->succ_size() != 2) { + if (MBB->succ_size() == 1) { + // A conditional branch with only one successor is weird, but allowed. + if (FBB != TBB) + report("MBB exits via conditional branch/branch through but only has " + "one CFG successor!", MBB); + else if (TBB != *MBB->succ_begin()) + report("MBB exits via conditional branch/branch through but the CFG " + "successor don't match the actual successor!", MBB); + } else if (MBB->succ_size() != 2) { report("MBB exits via conditional branch/branch but doesn't have " "exactly two CFG successors!", MBB); } else if (!matchPair(MBB->succ_begin(), TBB, FBB)) { @@ -651,10 +737,10 @@ void MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { const MachineInstr *MI = MO->getParent(); const MCInstrDesc &MCID = MI->getDesc(); - const MCOperandInfo &MCOI = MCID.OpInfo[MONum]; // The first MCID.NumDefs operands must be explicit register defines if (MONum < MCID.getNumDefs()) { + const MCOperandInfo &MCOI = MCID.OpInfo[MONum]; if (!MO->isReg()) report("Explicit definition must be a register", MO, MONum); else if (!MO->isDef() && !MCOI.isOptionalDef()) @@ -662,6 +748,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { else if (MO->isImplicit()) report("Explicit definition marked as implicit", MO, MONum); } else if (MONum < MCID.getNumOperands()) { + const MCOperandInfo &MCOI = MCID.OpInfo[MONum]; // Don't check if it's the last operand in a variadic instruction. See, // e.g., LDM_RET in the arm back end. if (MO->isReg() && @@ -685,6 +772,12 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { if (MRI->tracksLiveness() && !MI->isDebugValue()) checkLiveness(MO, MONum); + // Verify two-address constraints after leaving SSA form. + unsigned DefIdx; + if (!MRI->isSSA() && MO->isUse() && + MI->isRegTiedToDefOperand(MONum, &DefIdx) && + Reg != MI->getOperand(DefIdx).getReg()) + report("Two-address instruction operands must be identical", MO, MONum); // Check register classes. if (MONum < MCID.getNumOperands() && !MO->isImplicit()) { @@ -786,20 +879,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { if (MO->readsReg()) { regsLiveInButUnused.erase(Reg); - bool isKill = false; - unsigned defIdx; - if (MI->isRegTiedToDefOperand(MONum, &defIdx)) { - // A two-addr use counts as a kill if use and def are the same. - unsigned DefReg = MI->getOperand(defIdx).getReg(); - if (Reg == DefReg) - isKill = true; - else if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - report("Two-address instruction operands must be identical", MO, MONum); - } - } else - isKill = MO->isKill(); - - if (isKill) + if (MO->isKill()) addRegWithSubRegs(regsKilled, Reg); // Check that LiveVars knows this kill. @@ -811,23 +891,44 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { } // Check LiveInts liveness and kill. - if (TargetRegisterInfo::isVirtualRegister(Reg) && - LiveInts && !LiveInts->isNotInMIMap(MI)) { - SlotIndex UseIdx = LiveInts->getInstructionIndex(MI).getRegSlot(true); - if (LiveInts->hasInterval(Reg)) { - const LiveInterval &LI = LiveInts->getInterval(Reg); - if (!LI.liveAt(UseIdx)) { - report("No live range at use", MO, MONum); - *OS << UseIdx << " is not live in " << LI << '\n'; + if (LiveInts && !LiveInts->isNotInMIMap(MI)) { + SlotIndex UseIdx = LiveInts->getInstructionIndex(MI); + // Check the cached regunit intervals. + if (TargetRegisterInfo::isPhysicalRegister(Reg) && !isReserved(Reg)) { + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { + if (const LiveInterval *LI = LiveInts->getCachedRegUnit(*Units)) { + LiveRangeQuery LRQ(*LI, UseIdx); + if (!LRQ.valueIn()) { + report("No live range at use", MO, MONum); + *OS << UseIdx << " is not live in " << PrintRegUnit(*Units, TRI) + << ' ' << *LI << '\n'; + } + if (MO->isKill() && !LRQ.isKill()) { + report("Live range continues after kill flag", MO, MONum); + *OS << PrintRegUnit(*Units, TRI) << ' ' << *LI << '\n'; + } + } } - // Check for extra kill flags. - // Note that we allow missing kill flags for now. - if (MO->isKill() && !LI.killedAt(UseIdx.getRegSlot())) { - report("Live range continues after kill flag", MO, MONum); - *OS << "Live range: " << LI << '\n'; + } + + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (LiveInts->hasInterval(Reg)) { + // This is a virtual register interval. + const LiveInterval &LI = LiveInts->getInterval(Reg); + LiveRangeQuery LRQ(LI, UseIdx); + if (!LRQ.valueIn()) { + report("No live range at use", MO, MONum); + *OS << UseIdx << " is not live in " << LI << '\n'; + } + // Check for extra kill flags. + // Note that we allow missing kill flags for now. + if (MO->isKill() && !LRQ.isKill()) { + report("Live range continues after kill flag", MO, MONum); + *OS << "Live range: " << LI << '\n'; + } + } else { + report("Virtual register has no live interval", MO, MONum); } - } else { - report("Virtual register has no Live interval", MO, MONum); } } @@ -1124,281 +1225,282 @@ void MachineVerifier::verifyLiveIntervals() { const LiveInterval &LI = LiveInts->getInterval(Reg); assert(Reg == LI.reg && "Invalid reg to interval mapping"); + verifyLiveInterval(LI); + } - for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end(); - I!=E; ++I) { - VNInfo *VNI = *I; - const VNInfo *DefVNI = LI.getVNInfoAt(VNI->def); + // Verify all the cached regunit intervals. + for (unsigned i = 0, e = TRI->getNumRegUnits(); i != e; ++i) + if (const LiveInterval *LI = LiveInts->getCachedRegUnit(i)) + verifyLiveInterval(*LI); +} - if (!DefVNI) { - if (!VNI->isUnused()) { - report("Valno not live at def and not marked unused", MF); - *OS << "Valno #" << VNI->id << " in " << LI << '\n'; - } - continue; - } +void MachineVerifier::verifyLiveIntervalValue(const LiveInterval &LI, + VNInfo *VNI) { + if (VNI->isUnused()) + return; - if (VNI->isUnused()) - continue; + const VNInfo *DefVNI = LI.getVNInfoAt(VNI->def); - if (DefVNI != VNI) { - report("Live range at def has different valno", MF); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def - << " where valno #" << DefVNI->id << " is live in " << LI << '\n'; - continue; - } + if (!DefVNI) { + report("Valno not live at def and not marked unused", MF, LI); + *OS << "Valno #" << VNI->id << '\n'; + return; + } - const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def); - if (!MBB) { - report("Invalid definition index", MF); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def - << " in " << LI << '\n'; - continue; - } + if (DefVNI != VNI) { + report("Live range at def has different valno", MF, LI); + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def + << " where valno #" << DefVNI->id << " is live\n"; + return; + } - if (VNI->isPHIDef()) { - if (VNI->def != LiveInts->getMBBStartIdx(MBB)) { - report("PHIDef value is not defined at MBB start", MF); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def - << ", not at the beginning of BB#" << MBB->getNumber() - << " in " << LI << '\n'; - } - } else { - // Non-PHI def. - const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def); - if (!MI) { - report("No instruction at def index", MF); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def - << " in " << LI << '\n'; - continue; - } + const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def); + if (!MBB) { + report("Invalid definition index", MF, LI); + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def + << " in " << LI << '\n'; + return; + } - bool hasDef = false; - bool isEarlyClobber = false; - for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) { - if (!MOI->isReg() || !MOI->isDef()) - continue; - if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { - if (MOI->getReg() != LI.reg) - continue; - } else { - if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) || - !TRI->regsOverlap(LI.reg, MOI->getReg())) - continue; - } - hasDef = true; - if (MOI->isEarlyClobber()) - isEarlyClobber = true; - } + if (VNI->isPHIDef()) { + if (VNI->def != LiveInts->getMBBStartIdx(MBB)) { + report("PHIDef value is not defined at MBB start", MBB, LI); + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def + << ", not at the beginning of BB#" << MBB->getNumber() << '\n'; + } + return; + } - if (!hasDef) { - report("Defining instruction does not modify register", MI); - *OS << "Valno #" << VNI->id << " in " << LI << '\n'; - } + // Non-PHI def. + const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def); + if (!MI) { + report("No instruction at def index", MBB, LI); + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; + return; + } - // Early clobber defs begin at USE slots, but other defs must begin at - // DEF slots. - if (isEarlyClobber) { - if (!VNI->def.isEarlyClobber()) { - report("Early clobber def must be at an early-clobber slot", MF); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def - << " in " << LI << '\n'; - } - } else if (!VNI->def.isRegister()) { - report("Non-PHI, non-early clobber def must be at a register slot", - MF); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def - << " in " << LI << '\n'; - } - } + bool hasDef = false; + bool isEarlyClobber = false; + for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) { + if (!MOI->isReg() || !MOI->isDef()) + continue; + if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { + if (MOI->getReg() != LI.reg) + continue; + } else { + if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) || + !TRI->hasRegUnit(MOI->getReg(), LI.reg)) + continue; } + hasDef = true; + if (MOI->isEarlyClobber()) + isEarlyClobber = true; + } - for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I!=E; ++I) { - const VNInfo *VNI = I->valno; - assert(VNI && "Live range has no valno"); + if (!hasDef) { + report("Defining instruction does not modify register", MI); + *OS << "Valno #" << VNI->id << " in " << LI << '\n'; + } - if (VNI->id >= LI.getNumValNums() || VNI != LI.getValNumInfo(VNI->id)) { - report("Foreign valno in live range", MF); - I->print(*OS); - *OS << " has a valno not in " << LI << '\n'; - } + // Early clobber defs begin at USE slots, but other defs must begin at + // DEF slots. + if (isEarlyClobber) { + if (!VNI->def.isEarlyClobber()) { + report("Early clobber def must be at an early-clobber slot", MBB, LI); + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; + } + } else if (!VNI->def.isRegister()) { + report("Non-PHI, non-early clobber def must be at a register slot", + MBB, LI); + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; + } +} - if (VNI->isUnused()) { - report("Live range valno is marked unused", MF); - I->print(*OS); - *OS << " in " << LI << '\n'; - } +void +MachineVerifier::verifyLiveIntervalSegment(const LiveInterval &LI, + LiveInterval::const_iterator I) { + const VNInfo *VNI = I->valno; + assert(VNI && "Live range has no valno"); + + if (VNI->id >= LI.getNumValNums() || VNI != LI.getValNumInfo(VNI->id)) { + report("Foreign valno in live range", MF, LI); + *OS << *I << " has a bad valno\n"; + } - const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(I->start); - if (!MBB) { - report("Bad start of live segment, no basic block", MF); - I->print(*OS); - *OS << " in " << LI << '\n'; - continue; - } - SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB); - if (I->start != MBBStartIdx && I->start != VNI->def) { - report("Live segment must begin at MBB entry or valno def", MBB); - I->print(*OS); - *OS << " in " << LI << '\n' << "Basic block starts at " - << MBBStartIdx << '\n'; - } + if (VNI->isUnused()) { + report("Live range valno is marked unused", MF, LI); + *OS << *I << '\n'; + } - const MachineBasicBlock *EndMBB = - LiveInts->getMBBFromIndex(I->end.getPrevSlot()); - if (!EndMBB) { - report("Bad end of live segment, no basic block", MF); - I->print(*OS); - *OS << " in " << LI << '\n'; - continue; - } + const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(I->start); + if (!MBB) { + report("Bad start of live segment, no basic block", MF, LI); + *OS << *I << '\n'; + return; + } + SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB); + if (I->start != MBBStartIdx && I->start != VNI->def) { + report("Live segment must begin at MBB entry or valno def", MBB, LI); + *OS << *I << '\n'; + } - // No more checks for live-out segments. - if (I->end == LiveInts->getMBBEndIdx(EndMBB)) - continue; + const MachineBasicBlock *EndMBB = + LiveInts->getMBBFromIndex(I->end.getPrevSlot()); + if (!EndMBB) { + report("Bad end of live segment, no basic block", MF, LI); + *OS << *I << '\n'; + return; + } - // The live segment is ending inside EndMBB - const MachineInstr *MI = - LiveInts->getInstructionFromIndex(I->end.getPrevSlot()); - if (!MI) { - report("Live segment doesn't end at a valid instruction", EndMBB); - I->print(*OS); - *OS << " in " << LI << '\n' << "Basic block starts at " - << MBBStartIdx << '\n'; + // No more checks for live-out segments. + if (I->end == LiveInts->getMBBEndIdx(EndMBB)) + return; + + // RegUnit intervals are allowed dead phis. + if (!TargetRegisterInfo::isVirtualRegister(LI.reg) && VNI->isPHIDef() && + I->start == VNI->def && I->end == VNI->def.getDeadSlot()) + return; + + // The live segment is ending inside EndMBB + const MachineInstr *MI = + LiveInts->getInstructionFromIndex(I->end.getPrevSlot()); + if (!MI) { + report("Live segment doesn't end at a valid instruction", EndMBB, LI); + *OS << *I << '\n'; + return; + } + + // The block slot must refer to a basic block boundary. + if (I->end.isBlock()) { + report("Live segment ends at B slot of an instruction", EndMBB, LI); + *OS << *I << '\n'; + } + + if (I->end.isDead()) { + // Segment ends on the dead slot. + // That means there must be a dead def. + if (!SlotIndex::isSameInstr(I->start, I->end)) { + report("Live segment ending at dead slot spans instructions", EndMBB, LI); + *OS << *I << '\n'; + } + } + + // A live segment can only end at an early-clobber slot if it is being + // redefined by an early-clobber def. + if (I->end.isEarlyClobber()) { + if (I+1 == LI.end() || (I+1)->start != I->end) { + report("Live segment ending at early clobber slot must be " + "redefined by an EC def in the same instruction", EndMBB, LI); + *OS << *I << '\n'; + } + } + + // The following checks only apply to virtual registers. Physreg liveness + // is too weird to check. + if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { + // A live range can end with either a redefinition, a kill flag on a + // use, or a dead flag on a def. + bool hasRead = false; + bool hasDeadDef = false; + for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) { + if (!MOI->isReg() || MOI->getReg() != LI.reg) continue; - } + if (MOI->readsReg()) + hasRead = true; + if (MOI->isDef() && MOI->isDead()) + hasDeadDef = true; + } - // The block slot must refer to a basic block boundary. - if (I->end.isBlock()) { - report("Live segment ends at B slot of an instruction", MI); + if (I->end.isDead()) { + if (!hasDeadDef) { + report("Instruction doesn't have a dead def operand", MI); I->print(*OS); *OS << " in " << LI << '\n'; } - - if (I->end.isDead()) { - // Segment ends on the dead slot. - // That means there must be a dead def. - if (!SlotIndex::isSameInstr(I->start, I->end)) { - report("Live segment ending at dead slot spans instructions", MI); - I->print(*OS); - *OS << " in " << LI << '\n'; - } - } - - // A live segment can only end at an early-clobber slot if it is being - // redefined by an early-clobber def. - if (I->end.isEarlyClobber()) { - if (I+1 == E || (I+1)->start != I->end) { - report("Live segment ending at early clobber slot must be " - "redefined by an EC def in the same instruction", MI); - I->print(*OS); - *OS << " in " << LI << '\n'; - } + } else { + if (!hasRead) { + report("Instruction ending live range doesn't read the register", MI); + *OS << *I << " in " << LI << '\n'; } + } + } - // The following checks only apply to virtual registers. Physreg liveness - // is too weird to check. - if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { - // A live range can end with either a redefinition, a kill flag on a - // use, or a dead flag on a def. - bool hasRead = false; - bool hasDeadDef = false; - for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) { - if (!MOI->isReg() || MOI->getReg() != LI.reg) - continue; - if (MOI->readsReg()) - hasRead = true; - if (MOI->isDef() && MOI->isDead()) - hasDeadDef = true; - } - - if (I->end.isDead()) { - if (!hasDeadDef) { - report("Instruction doesn't have a dead def operand", MI); - I->print(*OS); - *OS << " in " << LI << '\n'; - } - } else { - if (!hasRead) { - report("Instruction ending live range doesn't read the register", - MI); - I->print(*OS); - *OS << " in " << LI << '\n'; - } - } - } + // Now check all the basic blocks in this live segment. + MachineFunction::const_iterator MFI = MBB; + // Is this live range the beginning of a non-PHIDef VN? + if (I->start == VNI->def && !VNI->isPHIDef()) { + // Not live-in to any blocks. + if (MBB == EndMBB) + return; + // Skip this block. + ++MFI; + } + for (;;) { + assert(LiveInts->isLiveInToMBB(LI, MFI)); + // We don't know how to track physregs into a landing pad. + if (!TargetRegisterInfo::isVirtualRegister(LI.reg) && + MFI->isLandingPad()) { + if (&*MFI == EndMBB) + break; + ++MFI; + continue; + } - // Now check all the basic blocks in this live segment. - MachineFunction::const_iterator MFI = MBB; - // Is this live range the beginning of a non-PHIDef VN? - if (I->start == VNI->def && !VNI->isPHIDef()) { - // Not live-in to any blocks. - if (MBB == EndMBB) - continue; - // Skip this block. - ++MFI; + // Is VNI a PHI-def in the current block? + bool IsPHI = VNI->isPHIDef() && + VNI->def == LiveInts->getMBBStartIdx(MFI); + + // Check that VNI is live-out of all predecessors. + for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(), + PE = MFI->pred_end(); PI != PE; ++PI) { + SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI); + const VNInfo *PVNI = LI.getVNInfoBefore(PEnd); + + // All predecessors must have a live-out value. + if (!PVNI) { + report("Register not marked live out of predecessor", *PI, LI); + *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber() + << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before " + << PEnd << '\n'; + continue; } - for (;;) { - assert(LiveInts->isLiveInToMBB(LI, MFI)); - // We don't know how to track physregs into a landing pad. - if (TargetRegisterInfo::isPhysicalRegister(LI.reg) && - MFI->isLandingPad()) { - if (&*MFI == EndMBB) - break; - ++MFI; - continue; - } - // Is VNI a PHI-def in the current block? - bool IsPHI = VNI->isPHIDef() && - VNI->def == LiveInts->getMBBStartIdx(MFI); - - // Check that VNI is live-out of all predecessors. - for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(), - PE = MFI->pred_end(); PI != PE; ++PI) { - SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI); - const VNInfo *PVNI = LI.getVNInfoBefore(PEnd); - - // All predecessors must have a live-out value. - if (!PVNI) { - report("Register not marked live out of predecessor", *PI); - *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber() - << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before " - << PEnd << " in " << LI << '\n'; - continue; - } - - // Only PHI-defs can take different predecessor values. - if (!IsPHI && PVNI != VNI) { - report("Different value live out of predecessor", *PI); - *OS << "Valno #" << PVNI->id << " live out of BB#" - << (*PI)->getNumber() << '@' << PEnd - << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber() - << '@' << LiveInts->getMBBStartIdx(MFI) << " in " - << PrintReg(Reg) << ": " << LI << '\n'; - } - } - if (&*MFI == EndMBB) - break; - ++MFI; + // Only PHI-defs can take different predecessor values. + if (!IsPHI && PVNI != VNI) { + report("Different value live out of predecessor", *PI, LI); + *OS << "Valno #" << PVNI->id << " live out of BB#" + << (*PI)->getNumber() << '@' << PEnd + << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber() + << '@' << LiveInts->getMBBStartIdx(MFI) << '\n'; } } + if (&*MFI == EndMBB) + break; + ++MFI; + } +} - // Check the LI only has one connected component. - if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { - ConnectedVNInfoEqClasses ConEQ(*LiveInts); - unsigned NumComp = ConEQ.Classify(&LI); - if (NumComp > 1) { - report("Multiple connected components in live interval", MF); - *OS << NumComp << " components in " << LI << '\n'; - for (unsigned comp = 0; comp != NumComp; ++comp) { - *OS << comp << ": valnos"; - for (LiveInterval::const_vni_iterator I = LI.vni_begin(), - E = LI.vni_end(); I!=E; ++I) - if (comp == ConEQ.getEqClass(*I)) - *OS << ' ' << (*I)->id; - *OS << '\n'; - } +void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) { + for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end(); + I!=E; ++I) + verifyLiveIntervalValue(LI, *I); + + for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I!=E; ++I) + verifyLiveIntervalSegment(LI, I); + + // Check the LI only has one connected component. + if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { + ConnectedVNInfoEqClasses ConEQ(*LiveInts); + unsigned NumComp = ConEQ.Classify(&LI); + if (NumComp > 1) { + report("Multiple connected components in live interval", MF, LI); + for (unsigned comp = 0; comp != NumComp; ++comp) { + *OS << comp << ": valnos"; + for (LiveInterval::const_vni_iterator I = LI.vni_begin(), + E = LI.vni_end(); I!=E; ++I) + if (comp == ConEQ.getEqClass(*I)) + *OS << ' ' << (*I)->id; + *OS << '\n'; } } } diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index 69d6d00..56526f2 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -88,6 +88,10 @@ PrintMachineInstrs("print-machineinstrs", cl::ValueOptional, cl::desc("Print machine instrs"), cl::value_desc("pass-name"), cl::init("option-unspecified")); +// Experimental option to run live inteerval analysis early. +static cl::opt<bool> EarlyLiveIntervals("early-live-intervals", cl::Hidden, + cl::desc("Run live interval analysis earlier in the pipeline")); + /// Allow standard passes to be disabled by command line options. This supports /// simple binary flags that either suppress the pass or do nothing. /// i.e. -disable-mypass=false has no effect. @@ -452,7 +456,8 @@ void TargetPassConfig::addMachinePasses() { printAndVerify("After Instruction Selection"); // Expand pseudo-instructions emitted by ISel. - addPass(&ExpandISelPseudosID); + if (addPass(&ExpandISelPseudosID)) + printAndVerify("After ExpandISelPseudos"); // Add passes that optimize machine instructions in SSA form. if (getOptLevel() != CodeGenOpt::None) { @@ -648,6 +653,11 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { addPass(&MachineLoopInfoID); addPass(&PHIEliminationID); } + + // Eventually, we want to run LiveIntervals before PHI elimination. + if (EarlyLiveIntervals) + addPass(&LiveIntervalsID); + addPass(&TwoAddressInstructionPassID); if (EnableStrongPHIElim) diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index 91c33c4..9099862 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -78,6 +78,8 @@ STATISTIC(NumReuse, "Number of extension results reused"); STATISTIC(NumBitcasts, "Number of bitcasts eliminated"); STATISTIC(NumCmps, "Number of compares eliminated"); STATISTIC(NumImmFold, "Number of move immediate folded"); +STATISTIC(NumLoadFold, "Number of loads folded"); +STATISTIC(NumSelects, "Number of selects optimized"); namespace { class PeepholeOptimizer : public MachineFunctionPass { @@ -108,12 +110,14 @@ namespace { bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, SmallPtrSet<MachineInstr*, 8> &LocalMIs); + bool optimizeSelect(MachineInstr *MI); bool isMoveImmediate(MachineInstr *MI, SmallSet<unsigned, 4> &ImmDefRegs, DenseMap<unsigned, MachineInstr*> &ImmDefMIs); bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, SmallSet<unsigned, 4> &ImmDefRegs, DenseMap<unsigned, MachineInstr*> &ImmDefMIs); + bool isLoadFoldable(MachineInstr *MI, unsigned &FoldAsLoadDefReg); }; } @@ -384,6 +388,47 @@ bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI, return false; } +/// Optimize a select instruction. +bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI) { + unsigned TrueOp = 0; + unsigned FalseOp = 0; + bool Optimizable = false; + SmallVector<MachineOperand, 4> Cond; + if (TII->analyzeSelect(MI, Cond, TrueOp, FalseOp, Optimizable)) + return false; + if (!Optimizable) + return false; + if (!TII->optimizeSelect(MI)) + return false; + MI->eraseFromParent(); + ++NumSelects; + return true; +} + +/// isLoadFoldable - Check whether MI is a candidate for folding into a later +/// instruction. We only fold loads to virtual registers and the virtual +/// register defined has a single use. +bool PeepholeOptimizer::isLoadFoldable(MachineInstr *MI, + unsigned &FoldAsLoadDefReg) { + if (!MI->canFoldAsLoad() || !MI->mayLoad()) + return false; + const MCInstrDesc &MCID = MI->getDesc(); + if (MCID.getNumDefs() != 1) + return false; + + unsigned Reg = MI->getOperand(0).getReg(); + // To reduce compilation time, we check MRI->hasOneUse when inserting + // loads. It should be checked when processing uses of the load, since + // uses can be removed during peephole. + if (!MI->getOperand(0).getSubReg() && + TargetRegisterInfo::isVirtualRegister(Reg) && + MRI->hasOneUse(Reg)) { + FoldAsLoadDefReg = Reg; + return true; + } + return false; +} + bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI, SmallSet<unsigned, 4> &ImmDefRegs, DenseMap<unsigned, MachineInstr*> &ImmDefMIs) { @@ -441,6 +486,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { SmallPtrSet<MachineInstr*, 8> LocalMIs; SmallSet<unsigned, 4> ImmDefRegs; DenseMap<unsigned, MachineInstr*> ImmDefMIs; + unsigned FoldAsLoadDefReg; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { MachineBasicBlock *MBB = &*I; @@ -448,37 +494,33 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { LocalMIs.clear(); ImmDefRegs.clear(); ImmDefMIs.clear(); + FoldAsLoadDefReg = 0; - bool First = true; - MachineBasicBlock::iterator PMII; for (MachineBasicBlock::iterator MII = I->begin(), MIE = I->end(); MII != MIE; ) { MachineInstr *MI = &*MII; + // We may be erasing MI below, increment MII now. + ++MII; LocalMIs.insert(MI); + // If there exists an instruction which belongs to the following + // categories, we will discard the load candidate. if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() || MI->hasUnmodeledSideEffects()) { - ++MII; + FoldAsLoadDefReg = 0; continue; } - - if (MI->isBitcast()) { - if (optimizeBitcastInstr(MI, MBB)) { - // MI is deleted. - LocalMIs.erase(MI); - Changed = true; - MII = First ? I->begin() : llvm::next(PMII); - continue; - } - } else if (MI->isCompare()) { - if (optimizeCmpInstr(MI, MBB)) { - // MI is deleted. - LocalMIs.erase(MI); - Changed = true; - MII = First ? I->begin() : llvm::next(PMII); - continue; - } + if (MI->mayStore() || MI->isCall()) + FoldAsLoadDefReg = 0; + + if ((MI->isBitcast() && optimizeBitcastInstr(MI, MBB)) || + (MI->isCompare() && optimizeCmpInstr(MI, MBB)) || + (MI->isSelect() && optimizeSelect(MI))) { + // MI is deleted. + LocalMIs.erase(MI); + Changed = true; + continue; } if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) { @@ -489,9 +531,29 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs); } - First = false; - PMII = MII; - ++MII; + // Check whether MI is a load candidate for folding into a later + // instruction. If MI is not a candidate, check whether we can fold an + // earlier load into MI. + if (!isLoadFoldable(MI, FoldAsLoadDefReg) && FoldAsLoadDefReg) { + // We need to fold load after optimizeCmpInstr, since optimizeCmpInstr + // can enable folding by converting SUB to CMP. + MachineInstr *DefMI = 0; + MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI, + FoldAsLoadDefReg, DefMI); + if (FoldMI) { + // Update LocalMIs since we replaced MI with FoldMI and deleted DefMI. + LocalMIs.erase(MI); + LocalMIs.erase(DefMI); + LocalMIs.insert(FoldMI); + MI->eraseFromParent(); + DefMI->eraseFromParent(); + ++NumLoadFold; + + // MI is replaced with FoldMI. + Changed = true; + continue; + } + } } } diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index 8325f20..6b3a48e 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -201,20 +201,16 @@ int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) { /// its virtual register, and it is guaranteed to be a block-local register. /// bool RAFast::isLastUseOfLocalReg(MachineOperand &MO) { - // Check for non-debug uses or defs following MO. - // This is the most likely way to fail - fast path it. - MachineOperand *Next = &MO; - while ((Next = Next->getNextOperandForReg())) - if (!Next->isDebug()) - return false; - // If the register has ever been spilled or reloaded, we conservatively assume // it is a global register used in multiple blocks. if (StackSlotForVirtReg[MO.getReg()] != -1) return false; // Check that the use/def chain has exactly one operand - MO. - return &MRI->reg_nodbg_begin(MO.getReg()).getOperand() == &MO; + MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(MO.getReg()); + if (&I.getOperand() != &MO) + return false; + return ++I == MRI->reg_nodbg_end(); } /// addKillFlag - Set kill flags on last use of a virtual register. diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 6ac5428..d0cff48 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -1747,7 +1747,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n" << "********** Function: " - << ((Value*)mf.getFunction())->getName() << '\n'); + << mf.getFunction()->getName() << '\n'); MF = &mf; if (VerifyEnabled) diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 733312f..9906334 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -460,14 +460,8 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo)); // Okay, merge "B1" into the same value number as "B0". - if (BValNo != ValLR->valno) { - // If B1 is killed by a PHI, then the merged live range must also be killed - // by the same PHI, as B0 and B1 can not overlap. - bool HasPHIKill = BValNo->hasPHIKill(); + if (BValNo != ValLR->valno) IntB.MergeValueNumberInto(BValNo, ValLR->valno); - if (HasPHIKill) - ValLR->valno->setHasPHIKill(true); - } DEBUG(dbgs() << " result = " << IntB << '\n'); // If the source instruction was killing the source register before the @@ -494,6 +488,11 @@ bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB, VNInfo *AValNo, VNInfo *BValNo) { + // If AValNo has PHI kills, conservatively assume that IntB defs can reach + // the PHI values. + if (LIS->hasPHIKill(IntA, AValNo)) + return true; + for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); AI != AE; ++AI) { if (AI->valno != AValNo) continue; @@ -558,10 +557,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, // AValNo is the value number in A that defines the copy, A3 in the example. VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true)); assert(AValNo && "COPY source not live"); - - // If other defs can reach uses of this def, then it's not safe to perform - // the optimization. - if (AValNo->isPHIDef() || AValNo->isUnused() || AValNo->hasPHIKill()) + if (AValNo->isPHIDef() || AValNo->isUnused()) return false; MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def); if (!DefMI) @@ -657,6 +653,8 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); if (ULR == IntA.end() || ULR->valno != AValNo) continue; + // Kill flags are no longer accurate. They are recomputed after RA. + UseMO.setIsKill(false); if (TargetRegisterInfo::isPhysicalRegister(NewReg)) UseMO.substPhysReg(NewReg, *TRI); else @@ -1093,6 +1091,11 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { // register live range doesn't need to be accurate as long as all the // defs are there. + // Delete the identity copy. + MachineInstr *CopyMI = MRI->getVRegDef(RHS.reg); + LIS->RemoveMachineInstrFromMaps(CopyMI); + CopyMI->eraseFromParent(); + // We don't track kills for reserved registers. MRI->clearKillFlags(CP.getSrcReg()); @@ -1382,24 +1385,6 @@ bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) { ++J; } - // Update kill info. Some live ranges are extended due to copy coalescing. - for (DenseMap<VNInfo*, VNInfo*>::iterator I = LHSValsDefinedFromRHS.begin(), - E = LHSValsDefinedFromRHS.end(); I != E; ++I) { - VNInfo *VNI = I->first; - unsigned LHSValID = LHSValNoAssignments[VNI->id]; - if (VNI->hasPHIKill()) - NewVNInfo[LHSValID]->setHasPHIKill(true); - } - - // Update kill info. Some live ranges are extended due to copy coalescing. - for (DenseMap<VNInfo*, VNInfo*>::iterator I = RHSValsDefinedFromLHS.begin(), - E = RHSValsDefinedFromLHS.end(); I != E; ++I) { - VNInfo *VNI = I->first; - unsigned RHSValID = RHSValNoAssignments[VNI->id]; - if (VNI->hasPHIKill()) - NewVNInfo[RHSValID]->setHasPHIKill(true); - } - // Clear kill flags where live ranges are extended. while (!LHSOldKills.empty()) LHSOldKills.pop_back_val()->clearRegisterKills(LHS.reg, TRI); diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 110f478..9c1dba3 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -411,12 +411,11 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { const MachineInstr *MI = SU->getInstr(); unsigned Reg = MI->getOperand(OperIdx).getReg(); - // SSA defs do not have output/anti dependencies. + // Singly defined vregs do not have output/anti dependencies. // The current operand is a def, so we have at least one. - // - // FIXME: This optimization is disabled pending PR13112. - //if (llvm::next(MRI.def_begin(Reg)) == MRI.def_end()) - // return; + // Check here if there are any others... + if (MRI.hasOneDef(Reg)) + return; // Add output dependence to the next nearest def of this vreg. // diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 747bc44..1c485a0 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -228,6 +228,9 @@ namespace { SDValue visitFP_EXTEND(SDNode *N); SDValue visitFNEG(SDNode *N); SDValue visitFABS(SDNode *N); + SDValue visitFCEIL(SDNode *N); + SDValue visitFTRUNC(SDNode *N); + SDValue visitFFLOOR(SDNode *N); SDValue visitBRCOND(SDNode *N); SDValue visitBR_CC(SDNode *N); SDValue visitLOAD(SDNode *N); @@ -1140,6 +1143,9 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::FP_EXTEND: return visitFP_EXTEND(N); case ISD::FNEG: return visitFNEG(N); case ISD::FABS: return visitFABS(N); + case ISD::FFLOOR: return visitFFLOOR(N); + case ISD::FCEIL: return visitFCEIL(N); + case ISD::FTRUNC: return visitFTRUNC(N); case ISD::BRCOND: return visitBRCOND(N); case ISD::BR_CC: return visitBR_CC(N); case ISD::LOAD: return visitLOAD(N); @@ -5679,7 +5685,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || DAG.getTarget().Options.UnsafeFPMath) && DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) && - TLI.isOperationLegal(ISD::FMA, VT)) { + TLI.isOperationLegalOrCustom(ISD::FMA, VT)) { // fold (fadd (fmul x, y), z) -> (fma x, y, z) if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) { @@ -5704,6 +5710,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); EVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); // fold vector ops if (VT.isVector()) { @@ -5724,11 +5731,11 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) return GetNegatedExpression(N1, DAG, LegalOperations); if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) - return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1); + return DAG.getNode(ISD::FNEG, dl, VT, N1); } // fold (fsub A, (fneg B)) -> (fadd A, B) if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) - return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, + return DAG.getNode(ISD::FADD, dl, VT, N0, GetNegatedExpression(N1, DAG, LegalOperations)); // If 'unsafe math' is enabled, fold @@ -5756,23 +5763,34 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || DAG.getTarget().Options.UnsafeFPMath) && DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) && - TLI.isOperationLegal(ISD::FMA, VT)) { + TLI.isOperationLegalOrCustom(ISD::FMA, VT)) { // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) { - return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FMA, dl, VT, N0.getOperand(0), N0.getOperand(1), - DAG.getNode(ISD::FNEG, N1->getDebugLoc(), VT, N1)); + DAG.getNode(ISD::FNEG, dl, VT, N1)); } // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) // Note: Commutes FSUB operands. if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) { - return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, - DAG.getNode(ISD::FNEG, N1->getDebugLoc(), VT, + return DAG.getNode(ISD::FMA, dl, VT, + DAG.getNode(ISD::FNEG, dl, VT, N1.getOperand(0)), N1.getOperand(1), N0); } + + // fold (fsub (-(fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) + if (N0.getOpcode() == ISD::FNEG && + N0.getOperand(0).getOpcode() == ISD::FMUL && + N0->hasOneUse() && N0.getOperand(0).hasOneUse()) { + SDValue N00 = N0.getOperand(0).getOperand(0); + SDValue N01 = N0.getOperand(0).getOperand(1); + return DAG.getNode(ISD::FMA, dl, VT, + DAG.getNode(ISD::FNEG, dl, VT, N00), N01, + DAG.getNode(ISD::FNEG, dl, VT, N1)); + } } return SDValue(); @@ -6231,6 +6249,42 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitFCEIL(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + EVT VT = N->getValueType(0); + + // fold (fceil c1) -> fceil(c1) + if (N0CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FCEIL, N->getDebugLoc(), VT, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitFTRUNC(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + EVT VT = N->getValueType(0); + + // fold (ftrunc c1) -> ftrunc(c1) + if (N0CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FTRUNC, N->getDebugLoc(), VT, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitFFLOOR(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + EVT VT = N->getValueType(0); + + // fold (ffloor c1) -> ffloor(c1) + if (N0CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FFLOOR, N->getDebugLoc(), VT, N0); + + return SDValue(); +} + SDValue DAGCombiner::visitFABS(SDNode *N) { SDValue N0 = N->getOperand(0); ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); @@ -7822,9 +7876,29 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits()) return SDValue(); - // Widen the input vector by adding undef values. - VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, - VecIn1, DAG.getUNDEF(VecIn1.getValueType())); + // If the element type of the input vector is not the same as + // the output element type, make concat_vectors based on input element + // type and then bitcast it to the output vector type. + // + // In another words avoid nodes like this: + // <NODE> v16i8 = concat_vectors v4i16 v4i16 + // Replace it with this one: + // <NODE0> v8i16 = concat_vectors v4i16 v4i16 + // <NODE1> v16i8 = bitcast NODE0 + EVT ItemType = VecIn1.getValueType().getVectorElementType(); + if (ItemType != VT.getVectorElementType()) { + EVT ConcatVT = EVT::getVectorVT(*DAG.getContext(), + ItemType, + VecIn1.getValueType().getVectorNumElements()*2); + // Widen the input vector by adding undef values. + VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatVT, + VecIn1, DAG.getUNDEF(VecIn1.getValueType())); + VecIn1 = DAG.getNode(ISD::BITCAST, dl, VT, VecIn1); + } else + // Widen the input vector by adding undef values. + VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, + VecIn1, DAG.getUNDEF(VecIn1.getValueType())); + } // If VecIn2 is unused then change it to undef. diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index e5ea6e6..683fac6 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -55,6 +55,7 @@ #include "llvm/Analysis/Loads.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/ErrorHandling.h" @@ -789,6 +790,17 @@ FastISel::SelectInstruction(const Instruction *I) { MachineBasicBlock::iterator SavedInsertPt = FuncInfo.InsertPt; + // As a special case, don't handle calls to builtin library functions that + // may be translated directly to target instructions. + if (const CallInst *Call = dyn_cast<CallInst>(I)) { + const Function *F = Call->getCalledFunction(); + LibFunc::Func Func; + if (F && !F->hasLocalLinkage() && F->hasName() && + LibInfo->getLibFunc(F->getName(), Func) && + LibInfo->hasOptimizedCodeGen(Func)) + return false; + } + // First, try doing target-independent selection. if (SelectOperator(I, I->getOpcode())) { ++NumFastIselSuccessIndependent; @@ -1040,7 +1052,8 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) { } } -FastISel::FastISel(FunctionLoweringInfo &funcInfo) +FastISel::FastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo) : FuncInfo(funcInfo), MRI(FuncInfo.MF->getRegInfo()), MFI(*FuncInfo.MF->getFrameInfo()), @@ -1049,7 +1062,8 @@ FastISel::FastISel(FunctionLoweringInfo &funcInfo) TD(*TM.getTargetData()), TII(*TM.getInstrInfo()), TLI(*TM.getTargetLowering()), - TRI(*TM.getRegisterInfo()) { + TRI(*TM.getRegisterInfo()), + LibInfo(libInfo) { } FastISel::~FastISel() {} diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 936c126..4488d27 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -411,6 +411,10 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op, } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op)) { MI->addOperand(MachineOperand::CreateBA(BA->getBlockAddress(), BA->getTargetFlags())); + } else if (TargetIndexSDNode *TI = dyn_cast<TargetIndexSDNode>(Op)) { + MI->addOperand(MachineOperand::CreateTargetIndex(TI->getIndex(), + TI->getOffset(), + TI->getTargetFlags())); } else { assert(Op.getValueType() != MVT::Other && Op.getValueType() != MVT::Glue && diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index b0776af..908ebb9 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -428,7 +428,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, DebugLoc dl = LD->getDebugLoc(); if (VT.isFloatingPoint() || VT.isVector()) { EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits()); - if (TLI.isTypeLegal(intVT)) { + if (TLI.isTypeLegal(intVT) && TLI.isTypeLegal(LoadedVT)) { // Expand to a (misaligned) integer load of the same size, // then bitconvert to floating point or vector. SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getPointerInfo(), @@ -436,8 +436,9 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, LD->isNonTemporal(), LD->isInvariant(), LD->getAlignment()); SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad); - if (VT.isFloatingPoint() && LoadedVT != VT) - Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result); + if (LoadedVT != VT) + Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND : + ISD::ANY_EXTEND, dl, VT, Result); ValResult = Result; ChainResult = Chain; diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 5384576..84e41fc 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -61,6 +61,7 @@ namespace llvm { if (isa<BasicBlockSDNode>(Node)) return true; if (isa<FrameIndexSDNode>(Node)) return true; if (isa<ConstantPoolSDNode>(Node)) return true; + if (isa<TargetIndexSDNode>(Node)) return true; if (isa<JumpTableSDNode>(Node)) return true; if (isa<ExternalSymbolSDNode>(Node)) return true; if (isa<BlockAddressSDNode>(Node)) return true; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index b971b69..f4fe892 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -403,6 +403,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddPointer(GA->getGlobal()); ID.AddInteger(GA->getOffset()); ID.AddInteger(GA->getTargetFlags()); + ID.AddInteger(GA->getAddressSpace()); break; } case ISD::BasicBlock: @@ -438,16 +439,25 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(CP->getTargetFlags()); break; } + case ISD::TargetIndex: { + const TargetIndexSDNode *TI = cast<TargetIndexSDNode>(N); + ID.AddInteger(TI->getIndex()); + ID.AddInteger(TI->getOffset()); + ID.AddInteger(TI->getTargetFlags()); + break; + } case ISD::LOAD: { const LoadSDNode *LD = cast<LoadSDNode>(N); ID.AddInteger(LD->getMemoryVT().getRawBits()); ID.AddInteger(LD->getRawSubclassData()); + ID.AddInteger(LD->getPointerInfo().getAddrSpace()); break; } case ISD::STORE: { const StoreSDNode *ST = cast<StoreSDNode>(N); ID.AddInteger(ST->getMemoryVT().getRawBits()); ID.AddInteger(ST->getRawSubclassData()); + ID.AddInteger(ST->getPointerInfo().getAddrSpace()); break; } case ISD::ATOMIC_CMP_SWAP: @@ -467,6 +477,12 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { const AtomicSDNode *AT = cast<AtomicSDNode>(N); ID.AddInteger(AT->getMemoryVT().getRawBits()); ID.AddInteger(AT->getRawSubclassData()); + ID.AddInteger(AT->getPointerInfo().getAddrSpace()); + break; + } + case ISD::PREFETCH: { + const MemSDNode *PF = cast<MemSDNode>(N); + ID.AddInteger(PF->getPointerInfo().getAddrSpace()); break; } case ISD::VECTOR_SHUFFLE: { @@ -483,6 +499,10 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { break; } } // end switch (N->getOpcode()) + + // Target specific memory nodes could also have address spaces to check. + if (N->isTargetMemoryOpcode()) + ID.AddInteger(cast<MemSDNode>(N)->getPointerInfo().getAddrSpace()); } /// AddNodeIDNode - Generic routine for adding a nodes info to the NodeID @@ -1100,6 +1120,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL, ID.AddPointer(GV); ID.AddInteger(Offset); ID.AddInteger(TargetFlags); + ID.AddInteger(GV->getType()->getAddressSpace()); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1199,6 +1220,24 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, return SDValue(N, 0); } +SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset, + unsigned char TargetFlags) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), 0, 0); + ID.AddInteger(Index); + ID.AddInteger(Offset); + ID.AddInteger(TargetFlags); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) TargetIndexSDNode(Index, VT, Offset, + TargetFlags); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) { FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), 0, 0); @@ -2444,6 +2483,24 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, case ISD::FABS: V.clearSign(); return getConstantFP(V, VT); + case ISD::FCEIL: { + APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive); + if (fs == APFloat::opOK || fs == APFloat::opInexact) + return getConstantFP(V, VT); + break; + } + case ISD::FTRUNC: { + APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero); + if (fs == APFloat::opOK || fs == APFloat::opInexact) + return getConstantFP(V, VT); + break; + } + case ISD::FFLOOR: { + APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative); + if (fs == APFloat::opOK || fs == APFloat::opInexact) + return getConstantFP(V, VT); + break; + } case ISD::FP_EXTEND: { bool ignored; // This can return overflow, underflow, or inexact; we don't care. @@ -3901,6 +3958,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, ID.AddInteger(MemVT.getRawBits()); SDValue Ops[] = {Chain, Ptr, Cmp, Swp}; AddNodeIDNode(ID, Opcode, VTs, Ops, 4); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void* IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<AtomicSDNode>(E)->refineAlignment(MMO); @@ -3973,6 +4031,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, ID.AddInteger(MemVT.getRawBits()); SDValue Ops[] = {Chain, Ptr, Val}; AddNodeIDNode(ID, Opcode, VTs, Ops, 3); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void* IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<AtomicSDNode>(E)->refineAlignment(MMO); @@ -4029,6 +4088,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, ID.AddInteger(MemVT.getRawBits()); SDValue Ops[] = {Chain, Ptr}; AddNodeIDNode(ID, Opcode, VTs, Ops, 2); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void* IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<AtomicSDNode>(E)->refineAlignment(MMO); @@ -4106,6 +4166,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO); @@ -4225,6 +4286,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(), MMO->isNonTemporal(), MMO->isInvariant())); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<LoadSDNode>(E)->refineAlignment(MMO); @@ -4314,6 +4376,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, ID.AddInteger(VT.getRawBits()); ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(), MMO->isNonTemporal(), MMO->isInvariant())); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<StoreSDNode>(E)->refineAlignment(MMO); @@ -4381,6 +4444,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, ID.AddInteger(SVT.getRawBits()); ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile(), MMO->isNonTemporal(), MMO->isInvariant())); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<StoreSDNode>(E)->refineAlignment(MMO); @@ -4405,6 +4469,7 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base, AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); ID.AddInteger(ST->getMemoryVT().getRawBits()); ID.AddInteger(ST->getRawSubclassData()); + ID.AddInteger(ST->getPointerInfo().getAddrSpace()); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 8cbe818..f3cf758 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1601,7 +1601,10 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, // Update successor info addSuccessorWithWeight(SwitchBB, CB.TrueBB, CB.TrueWeight); - addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight); + // TrueBB and FalseBB are always different unless the incoming IR is + // degenerate. This only happens when running llc on weird IR. + if (CB.TrueBB != CB.FalseBB) + addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight); // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. @@ -3460,7 +3463,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { SDValue InChain = getRoot(); - EVT VT = EVT::getEVT(I.getType()); + EVT VT = TLI.getValueType(I.getType()); if (I.getAlignment() * 8 < VT.getSizeInBits()) report_fatal_error("Cannot generate unaligned atomic load"); @@ -3490,7 +3493,7 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { SDValue InChain = getRoot(); - EVT VT = EVT::getEVT(I.getValueOperand()->getType()); + EVT VT = TLI.getValueType(I.getValueOperand()->getType()); if (I.getAlignment() * 8 < VT.getSizeInBits()) report_fatal_error("Cannot generate unaligned atomic store"); @@ -4929,6 +4932,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)))); return 0; + case Intrinsic::floor: + setValue(&I, DAG.getNode(ISD::FFLOOR, dl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)))); + return 0; case Intrinsic::fma: setValue(&I, DAG.getNode(ISD::FMA, dl, getValue(I.getArgOperand(0)).getValueType(), @@ -5506,6 +5514,22 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { return false; } +/// visitUnaryFloatCall - If a call instruction is a unary floating-point +/// operation (as expected), translate it to an SDNode with the specified opcode +/// and return true. +bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I, + unsigned Opcode) { + // Sanity check that it really is a unary floating-point call. + if (I.getNumArgOperands() != 1 || + !I.getArgOperand(0)->getType()->isFloatingPointTy() || + I.getType() != I.getArgOperand(0)->getType() || + !I.onlyReadsMemory()) + return false; + + SDValue Tmp = getValue(I.getArgOperand(0)); + setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(), Tmp.getValueType(), Tmp)); + return true; +} void SelectionDAGBuilder::visitCall(const CallInst &I) { // Handle inline assembly differently. @@ -5536,150 +5560,97 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { // Check for well-known libc/libm calls. If the function is internal, it // can't be a library call. - if (!F->hasLocalLinkage() && F->hasName()) { - StringRef Name = F->getName(); - if ((LibInfo->has(LibFunc::copysign) && Name == "copysign") || - (LibInfo->has(LibFunc::copysignf) && Name == "copysignf") || - (LibInfo->has(LibFunc::copysignl) && Name == "copysignl")) { + LibFunc::Func Func; + if (!F->hasLocalLinkage() && F->hasName() && + LibInfo->getLibFunc(F->getName(), Func) && + LibInfo->hasOptimizedCodeGen(Func)) { + switch (Func) { + default: break; + case LibFunc::copysign: + case LibFunc::copysignf: + case LibFunc::copysignl: if (I.getNumArgOperands() == 2 && // Basic sanity checks. I.getArgOperand(0)->getType()->isFloatingPointTy() && I.getType() == I.getArgOperand(0)->getType() && - I.getType() == I.getArgOperand(1)->getType()) { + I.getType() == I.getArgOperand(1)->getType() && + I.onlyReadsMemory()) { SDValue LHS = getValue(I.getArgOperand(0)); SDValue RHS = getValue(I.getArgOperand(1)); setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(), LHS.getValueType(), LHS, RHS)); return; } - } else if ((LibInfo->has(LibFunc::fabs) && Name == "fabs") || - (LibInfo->has(LibFunc::fabsf) && Name == "fabsf") || - (LibInfo->has(LibFunc::fabsl) && Name == "fabsl")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FABS, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::fabs: + case LibFunc::fabsf: + case LibFunc::fabsl: + if (visitUnaryFloatCall(I, ISD::FABS)) return; - } - } else if ((LibInfo->has(LibFunc::sin) && Name == "sin") || - (LibInfo->has(LibFunc::sinf) && Name == "sinf") || - (LibInfo->has(LibFunc::sinl) && Name == "sinl")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType() && - I.onlyReadsMemory()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FSIN, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::sin: + case LibFunc::sinf: + case LibFunc::sinl: + if (visitUnaryFloatCall(I, ISD::FSIN)) return; - } - } else if ((LibInfo->has(LibFunc::cos) && Name == "cos") || - (LibInfo->has(LibFunc::cosf) && Name == "cosf") || - (LibInfo->has(LibFunc::cosl) && Name == "cosl")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType() && - I.onlyReadsMemory()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FCOS, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::cos: + case LibFunc::cosf: + case LibFunc::cosl: + if (visitUnaryFloatCall(I, ISD::FCOS)) return; - } - } else if ((LibInfo->has(LibFunc::sqrt) && Name == "sqrt") || - (LibInfo->has(LibFunc::sqrtf) && Name == "sqrtf") || - (LibInfo->has(LibFunc::sqrtl) && Name == "sqrtl")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType() && - I.onlyReadsMemory()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FSQRT, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::sqrt: + case LibFunc::sqrtf: + case LibFunc::sqrtl: + if (visitUnaryFloatCall(I, ISD::FSQRT)) return; - } - } else if ((LibInfo->has(LibFunc::floor) && Name == "floor") || - (LibInfo->has(LibFunc::floorf) && Name == "floorf") || - (LibInfo->has(LibFunc::floorl) && Name == "floorl")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FFLOOR, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::floor: + case LibFunc::floorf: + case LibFunc::floorl: + if (visitUnaryFloatCall(I, ISD::FFLOOR)) return; - } - } else if ((LibInfo->has(LibFunc::nearbyint) && Name == "nearbyint") || - (LibInfo->has(LibFunc::nearbyintf) && Name == "nearbyintf") || - (LibInfo->has(LibFunc::nearbyintl) && Name == "nearbyintl")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FNEARBYINT, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::nearbyint: + case LibFunc::nearbyintf: + case LibFunc::nearbyintl: + if (visitUnaryFloatCall(I, ISD::FNEARBYINT)) return; - } - } else if ((LibInfo->has(LibFunc::ceil) && Name == "ceil") || - (LibInfo->has(LibFunc::ceilf) && Name == "ceilf") || - (LibInfo->has(LibFunc::ceill) && Name == "ceill")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FCEIL, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::ceil: + case LibFunc::ceilf: + case LibFunc::ceill: + if (visitUnaryFloatCall(I, ISD::FCEIL)) return; - } - } else if ((LibInfo->has(LibFunc::rint) && Name == "rint") || - (LibInfo->has(LibFunc::rintf) && Name == "rintf") || - (LibInfo->has(LibFunc::rintl) && Name == "rintl")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FRINT, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::rint: + case LibFunc::rintf: + case LibFunc::rintl: + if (visitUnaryFloatCall(I, ISD::FRINT)) return; - } - } else if ((LibInfo->has(LibFunc::trunc) && Name == "trunc") || - (LibInfo->has(LibFunc::truncf) && Name == "truncf") || - (LibInfo->has(LibFunc::truncl) && Name == "truncl")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FTRUNC, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::trunc: + case LibFunc::truncf: + case LibFunc::truncl: + if (visitUnaryFloatCall(I, ISD::FTRUNC)) return; - } - } else if ((LibInfo->has(LibFunc::log2) && Name == "log2") || - (LibInfo->has(LibFunc::log2f) && Name == "log2f") || - (LibInfo->has(LibFunc::log2l) && Name == "log2l")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType() && - I.onlyReadsMemory()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FLOG2, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::log2: + case LibFunc::log2f: + case LibFunc::log2l: + if (visitUnaryFloatCall(I, ISD::FLOG2)) return; - } - } else if ((LibInfo->has(LibFunc::exp2) && Name == "exp2") || - (LibInfo->has(LibFunc::exp2f) && Name == "exp2f") || - (LibInfo->has(LibFunc::exp2l) && Name == "exp2l")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType() && - I.onlyReadsMemory()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FEXP2, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::exp2: + case LibFunc::exp2f: + case LibFunc::exp2l: + if (visitUnaryFloatCall(I, ISD::FEXP2)) return; - } - } else if (Name == "memcmp") { + break; + case LibFunc::memcmp: if (visitMemCmpCall(I)) return; + break; } } } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index d0fde6f..4090002 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -520,6 +520,7 @@ private: void visitPHI(const PHINode &I); void visitCall(const CallInst &I); bool visitMemCmpCall(const CallInst &I); + bool visitUnaryFloatCall(const CallInst &I, unsigned Opcode); void visitAtomicLoad(const LoadInst &I); void visitAtomicStore(const StoreInst &I); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 9fc225f..13cd011 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -100,6 +100,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP"; case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP"; case ISD::ConstantPool: return "ConstantPool"; + case ISD::TargetIndex: return "TargetIndex"; case ISD::ExternalSymbol: return "ExternalSymbol"; case ISD::BlockAddress: return "BlockAddress"; case ISD::INTRINSIC_WO_CHAIN: @@ -409,6 +410,10 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { OS << " " << offset; if (unsigned int TF = CP->getTargetFlags()) OS << " [TF=" << TF << ']'; + } else if (const TargetIndexSDNode *TI = dyn_cast<TargetIndexSDNode>(this)) { + OS << "<" << TI->getIndex() << '+' << TI->getOffset() << ">"; + if (unsigned TF = TI->getTargetFlags()) + OS << " [TF=" << TF << ']'; } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) { OS << "<"; const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 287c679..4e5e3ba 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -979,7 +979,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Initialize the Fast-ISel state, if needed. FastISel *FastIS = 0; if (TM.Options.EnableFastISel) - FastIS = TLI.createFastISel(*FuncInfo); + FastIS = TLI.createFastISel(*FuncInfo, LibInfo); // Iterate over all basic blocks in the function. ReversePostOrderTraversal<const Function*> RPOT(&Fn); diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index dff9b2c..6820175 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -2303,7 +2303,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, N0.getOpcode() == ISD::AND) if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { - EVT ShiftTy = DCI.isBeforeLegalize() ? + EVT ShiftTy = DCI.isBeforeLegalizeOps() ? getPointerTy() : getShiftAmountTy(N0.getValueType()); if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3 // Perform the xform if the AND RHS is a single bit. @@ -2333,7 +2333,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, const APInt &AndRHSC = AndRHS->getAPIntValue(); if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) { unsigned ShiftBits = AndRHSC.countTrailingZeros(); - EVT ShiftTy = DCI.isBeforeLegalize() ? + EVT ShiftTy = DCI.isBeforeLegalizeOps() ? getPointerTy() : getShiftAmountTy(N0.getValueType()); EVT CmpTy = N0.getValueType(); SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0), @@ -2361,7 +2361,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } NewC = NewC.lshr(ShiftBits); if (ShiftBits && isLegalICmpImmediate(NewC.getSExtValue())) { - EVT ShiftTy = DCI.isBeforeLegalize() ? + EVT ShiftTy = DCI.isBeforeLegalizeOps() ? getPointerTy() : getShiftAmountTy(N0.getValueType()); EVT CmpTy = N0.getValueType(); SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0, @@ -2464,7 +2464,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO // if it is not already. ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO; - if (NewCond != Cond) + if (NewCond != Cond && (DCI.isBeforeLegalizeOps() || + getCondCodeAction(NewCond, N0.getValueType()) == Legal)) return DAG.getSetCC(dl, VT, N0, N1, NewCond); } diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index 9a751c1..4a2b7ec 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -652,7 +652,7 @@ void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) { // Adjust RegAssign if a register assignment is killed at VNI->def. We // want to avoid calculating the live range of the source register if // possible. - AssignI.find(VNI->def.getPrevSlot()); + AssignI.find(Def.getPrevSlot()); if (!AssignI.valid() || AssignI.start() >= Def) continue; // If MI doesn't kill the assigned register, just leave it. @@ -739,6 +739,8 @@ void SplitEditor::hoistCopiesForSize() { for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end(); VI != VE; ++VI) { VNInfo *VNI = *VI; + if (VNI->isUnused()) + continue; VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def); assert(ParentVNI && "Parent not live at complement def"); @@ -812,6 +814,8 @@ void SplitEditor::hoistCopiesForSize() { for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end(); VI != VE; ++VI) { VNInfo *VNI = *VI; + if (VNI->isUnused()) + continue; VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def); const DomPair &Dom = NearestDom[ParentVNI->id]; if (!Dom.first || Dom.second == VNI->def) @@ -1047,8 +1051,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { if (ParentVNI->isUnused()) continue; unsigned RegIdx = RegAssign.lookup(ParentVNI->def); - VNInfo *VNI = defValue(RegIdx, ParentVNI, ParentVNI->def); - VNI->setIsPHIDef(ParentVNI->isPHIDef()); + defValue(RegIdx, ParentVNI, ParentVNI->def); // Force rematted values to be recomputed everywhere. // The new live ranges may be truncated. diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index 43a6ad8..a04ac3f 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -28,15 +28,10 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/ADT/Triple.h" using namespace llvm; -// SSPBufferSize - The lower bound for a buffer to be considered for stack -// smashing protection. -static cl::opt<unsigned> -SSPBufferSize("stack-protector-buffer-size", cl::init(8), - cl::desc("Lower bound for a buffer to be considered for " - "stack protection")); - namespace { class StackProtector : public FunctionPass { /// TLI - Keep a pointer of a TargetLowering to consult for determining @@ -46,7 +41,7 @@ namespace { Function *F; Module *M; - DominatorTree* DT; + DominatorTree *DT; /// InsertStackProtectors - Insert code into the prologue and epilogue of /// the function. @@ -60,6 +55,11 @@ namespace { /// check fails. BasicBlock *CreateFailBB(); + /// ContainsProtectableArray - Check whether the type either is an array or + /// contains an array of sufficient size so that we need stack protectors + /// for it. + bool ContainsProtectableArray(Type *Ty, bool InStruct = false) const; + /// RequiresStackProtector - Check whether or not this function needs a /// stack protector based upon the stack protector level. bool RequiresStackProtector() const; @@ -70,8 +70,8 @@ namespace { } StackProtector(const TargetLowering *tli) : FunctionPass(ID), TLI(tli) { - initializeStackProtectorPass(*PassRegistry::getPassRegistry()); - } + initializeStackProtectorPass(*PassRegistry::getPassRegistry()); + } virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<DominatorTree>(); @@ -95,10 +95,43 @@ bool StackProtector::runOnFunction(Function &Fn) { DT = getAnalysisIfAvailable<DominatorTree>(); if (!RequiresStackProtector()) return false; - + return InsertStackProtectors(); } +/// ContainsProtectableArray - Check whether the type either is an array or +/// contains a char array of sufficient size so that we need stack protectors +/// for it. +bool StackProtector::ContainsProtectableArray(Type *Ty, bool InStruct) const { + if (!Ty) return false; + if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) { + const TargetMachine &TM = TLI->getTargetMachine(); + if (!AT->getElementType()->isIntegerTy(8)) { + Triple Trip(TM.getTargetTriple()); + + // If we're on a non-Darwin platform or we're inside of a structure, don't + // add stack protectors unless the array is a character array. + if (InStruct || !Trip.isOSDarwin()) + return false; + } + + // If an array has more than SSPBufferSize bytes of allocated space, then we + // emit stack protectors. + if (TM.Options.SSPBufferSize <= TLI->getTargetData()->getTypeAllocSize(AT)) + return true; + } + + const StructType *ST = dyn_cast<StructType>(Ty); + if (!ST) return false; + + for (StructType::element_iterator I = ST->element_begin(), + E = ST->element_end(); I != E; ++I) + if (ContainsProtectableArray(*I, true)) + return true; + + return false; +} + /// RequiresStackProtector - Check whether or not this function needs a stack /// protector based upon the stack protector level. The heuristic we use is to /// add a guard variable to functions that call alloca, and functions with @@ -110,8 +143,6 @@ bool StackProtector::RequiresStackProtector() const { if (!F->hasFnAttr(Attribute::StackProtect)) return false; - const TargetData *TD = TLI->getTargetData(); - for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { BasicBlock *BB = I; @@ -123,11 +154,8 @@ bool StackProtector::RequiresStackProtector() const { // protectors. return true; - if (ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType())) - // If an array has more than SSPBufferSize bytes of allocated space, - // then we emit stack protectors. - if (SSPBufferSize <= TD->getTypeAllocSize(AT)) - return true; + if (ContainsProtectableArray(AI->getAllocatedType())) + return true; } } @@ -159,17 +187,17 @@ bool StackProtector::InsertStackProtectors() { // StackGuardSlot = alloca i8* // StackGuard = load __stack_chk_guard // call void @llvm.stackprotect.create(StackGuard, StackGuardSlot) - // + // PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext()); unsigned AddressSpace, Offset; if (TLI->getStackCookieLocation(AddressSpace, Offset)) { Constant *OffsetVal = ConstantInt::get(Type::getInt32Ty(RI->getContext()), Offset); - + StackGuardVar = ConstantExpr::getIntToPtr(OffsetVal, PointerType::get(PtrTy, AddressSpace)); } else { - StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy); + StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy); } BasicBlock &Entry = F->getEntryBlock(); diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp index c6fdc73..5b06195 100644 --- a/lib/CodeGen/StrongPHIElimination.cpp +++ b/lib/CodeGen/StrongPHIElimination.cpp @@ -672,8 +672,8 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI, LiveInterval &SrcInterval = LI->getInterval(SrcReg); SlotIndex PredIndex = LI->getMBBEndIdx(PredBB); VNInfo *SrcVNI = SrcInterval.getVNInfoBefore(PredIndex); + (void)SrcVNI; assert(SrcVNI); - SrcVNI->setHasPHIKill(true); continue; } @@ -744,7 +744,6 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI, SlotIndex PHIIndex = LI->getInstructionIndex(PHI); VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getRegSlot()); assert(DestVNI); - DestVNI->setIsPHIDef(true); // Prior to PHI elimination, the live ranges of PHIs begin at their defining // instruction. After PHI elimination, PHI instructions are replaced by VNs @@ -777,7 +776,6 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI, SlotIndex DestCopyIndex = LI->getInstructionIndex(CopyInstr); VNInfo *CopyVNI = CopyLI.getNextValue(MBBStartIndex, LI->getVNInfoAllocator()); - CopyVNI->setIsPHIDef(true); CopyLI.addRange(LiveRange(MBBStartIndex, DestCopyIndex.getRegSlot(), CopyVNI)); diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index a3d6771..ddee6b2 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -570,12 +570,12 @@ TargetInstrInfoImpl::getNumMicroOps(const InstrItineraryData *ItinData, } /// Return the default expected latency for a def based on it's opcode. -unsigned TargetInstrInfo::defaultDefLatency(const InstrItineraryData *ItinData, +unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel *SchedModel, const MachineInstr *DefMI) const { if (DefMI->mayLoad()) - return ItinData->SchedModel->LoadLatency; + return SchedModel->LoadLatency; if (isHighLatencyDef(DefMI->getOpcode())) - return ItinData->SchedModel->HighLatency; + return SchedModel->HighLatency; return 1; } @@ -638,7 +638,7 @@ static int computeDefOperandLatency( return 1; } else if(ItinData->isEmpty()) - return TII->defaultDefLatency(ItinData, DefMI); + return TII->defaultDefLatency(ItinData->SchedModel, DefMI); // ...operand lookup required return -1; @@ -669,7 +669,8 @@ computeOperandLatency(const InstrItineraryData *ItinData, // Expected latency is the max of the stage latency and itinerary props. if (!FindMin) - InstrLatency = std::max(InstrLatency, defaultDefLatency(ItinData, DefMI)); + InstrLatency = std::max(InstrLatency, + defaultDefLatency(ItinData->SchedModel, DefMI)); return InstrLatency; } @@ -742,6 +743,7 @@ computeOperandLatency(const InstrItineraryData *ItinData, // Expected latency is the max of the stage latency and itinerary props. if (!FindMin) - InstrLatency = std::max(InstrLatency, defaultDefLatency(ItinData, DefMI)); + InstrLatency = std::max(InstrLatency, + defaultDefLatency(ItinData->SchedModel, DefMI)); return InstrLatency; } diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index e4c0119..aa601af 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -30,6 +30,7 @@ #define DEBUG_TYPE "twoaddrinstr" #include "llvm/CodeGen/Passes.h" #include "llvm/Function.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -55,18 +56,19 @@ STATISTIC(NumCommuted , "Number of instructions commuted to coalesce"); STATISTIC(NumAggrCommuted , "Number of instructions aggressively commuted"); STATISTIC(NumConvertedTo3Addr, "Number of instructions promoted to 3-address"); STATISTIC(Num3AddrSunk, "Number of 3-address instructions sunk"); -STATISTIC(NumReMats, "Number of instructions re-materialized"); -STATISTIC(NumDeletes, "Number of dead instructions deleted"); STATISTIC(NumReSchedUps, "Number of instructions re-scheduled up"); STATISTIC(NumReSchedDowns, "Number of instructions re-scheduled down"); namespace { class TwoAddressInstructionPass : public MachineFunctionPass { + MachineFunction *MF; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; const InstrItineraryData *InstrItins; MachineRegisterInfo *MRI; LiveVariables *LV; + SlotIndexes *Indexes; + LiveIntervals *LIS; AliasAnalysis *AA; CodeGenOpt::Level OptLevel; @@ -92,16 +94,9 @@ namespace { unsigned Reg, MachineBasicBlock::iterator OldPos); - bool isProfitableToReMat(unsigned Reg, const TargetRegisterClass *RC, - MachineInstr *MI, MachineInstr *DefMI, - MachineBasicBlock *MBB, unsigned Loc); - bool NoUseAfterLastDef(unsigned Reg, MachineBasicBlock *MBB, unsigned Dist, unsigned &LastDef); - MachineInstr *FindLastUseInMBB(unsigned Reg, MachineBasicBlock *MBB, - unsigned Dist); - bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, MachineInstr *MI, MachineBasicBlock *MBB, unsigned Dist); @@ -117,14 +112,6 @@ namespace { MachineFunction::iterator &mbbi, unsigned RegA, unsigned RegB, unsigned Dist); - typedef std::pair<std::pair<unsigned, bool>, MachineInstr*> NewKill; - bool canUpdateDeletedKills(SmallVector<unsigned, 4> &Kills, - SmallVector<NewKill, 4> &NewKills, - MachineBasicBlock *MBB, unsigned Dist); - bool DeleteUnusedInstr(MachineBasicBlock::iterator &mi, - MachineBasicBlock::iterator &nmi, - MachineFunction::iterator &mbbi, unsigned Dist); - bool isDefTooClose(unsigned Reg, unsigned Dist, MachineInstr *MI, MachineBasicBlock *MBB); @@ -150,6 +137,11 @@ namespace { void ProcessCopy(MachineInstr *MI, MachineBasicBlock *MBB, SmallPtrSet<MachineInstr*, 8> &Processed); + typedef SmallVector<std::pair<unsigned, unsigned>, 4> TiedPairList; + typedef SmallDenseMap<unsigned, TiedPairList> TiedOperandMap; + bool collectTiedOperands(MachineInstr *MI, TiedOperandMap&); + void processTiedPairs(MachineInstr *MI, TiedPairList&, unsigned &Dist); + void CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs, unsigned DstReg); /// EliminateRegSequences - Eliminate REG_SEQUENCE instructions as part @@ -167,6 +159,8 @@ namespace { AU.setPreservesCFG(); AU.addRequired<AliasAnalysis>(); AU.addPreserved<LiveVariables>(); + AU.addPreserved<SlotIndexes>(); + AU.addPreserved<LiveIntervals>(); AU.addPreservedID(MachineLoopInfoID); AU.addPreservedID(MachineDominatorsID); MachineFunctionPass::getAnalysisUsage(AU); @@ -241,7 +235,7 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB, // appropriate location, we can try to sink the current instruction // past it. if (!KillMI || KillMI->getParent() != MBB || KillMI == MI || - KillMI->isTerminator()) + KillMI == OldPos || KillMI->isTerminator()) return false; // If any of the definitions are used by another instruction between the @@ -284,6 +278,7 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB, } } } + assert(KillMO && "Didn't find kill"); // Update kill and LV information. KillMO->setIsKill(false); @@ -297,59 +292,13 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB, MBB->remove(MI); MBB->insert(KillPos, MI); + if (LIS) + LIS->handleMove(MI); + ++Num3AddrSunk; return true; } -/// isTwoAddrUse - Return true if the specified MI is using the specified -/// register as a two-address operand. -static bool isTwoAddrUse(MachineInstr *UseMI, unsigned Reg) { - const MCInstrDesc &MCID = UseMI->getDesc(); - for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) { - MachineOperand &MO = UseMI->getOperand(i); - if (MO.isReg() && MO.getReg() == Reg && - (MO.isDef() || UseMI->isRegTiedToDefOperand(i))) - // Earlier use is a two-address one. - return true; - } - return false; -} - -/// isProfitableToReMat - Return true if the heuristics determines it is likely -/// to be profitable to re-materialize the definition of Reg rather than copy -/// the register. -bool -TwoAddressInstructionPass::isProfitableToReMat(unsigned Reg, - const TargetRegisterClass *RC, - MachineInstr *MI, MachineInstr *DefMI, - MachineBasicBlock *MBB, unsigned Loc) { - bool OtherUse = false; - for (MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(Reg), - UE = MRI->use_nodbg_end(); UI != UE; ++UI) { - MachineOperand &UseMO = UI.getOperand(); - MachineInstr *UseMI = UseMO.getParent(); - MachineBasicBlock *UseMBB = UseMI->getParent(); - if (UseMBB == MBB) { - DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI); - if (DI != DistanceMap.end() && DI->second == Loc) - continue; // Current use. - OtherUse = true; - // There is at least one other use in the MBB that will clobber the - // register. - if (isTwoAddrUse(UseMI, Reg)) - return true; - } - } - - // If other uses in MBB are not two-address uses, then don't remat. - if (OtherUse) - return false; - - // No other uses in the same block, remat if it's defined in the same - // block so it does not unnecessarily extend the live range. - return MBB == DefMI->getParent(); -} - /// NoUseAfterLastDef - Return true if there are no intervening uses between the /// last instruction in the MBB that defines the specified register and the /// two-address instruction which is being processed. It also returns the last @@ -377,31 +326,6 @@ bool TwoAddressInstructionPass::NoUseAfterLastDef(unsigned Reg, return !(LastUse > LastDef && LastUse < Dist); } -MachineInstr *TwoAddressInstructionPass::FindLastUseInMBB(unsigned Reg, - MachineBasicBlock *MBB, - unsigned Dist) { - unsigned LastUseDist = 0; - MachineInstr *LastUse = 0; - for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Reg), - E = MRI->reg_end(); I != E; ++I) { - MachineOperand &MO = I.getOperand(); - MachineInstr *MI = MO.getParent(); - if (MI->getParent() != MBB || MI->isDebugValue()) - continue; - DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI); - if (DI == DistanceMap.end()) - continue; - if (DI->second >= Dist) - continue; - - if (MO.isUse() && DI->second > LastUseDist) { - LastUse = DI->first; - LastUseDist = DI->second; - } - } - return LastUse; -} - /// isCopyToReg - Return true if the specified MI is a copy instruction or /// a extract_subreg instruction. It also returns the source and destination /// registers and whether they are physical registers by reference. @@ -538,7 +462,7 @@ regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) { } -/// isProfitableToReMat - Return true if it's potentially profitable to commute +/// isProfitableToCommute - Return true if it's potentially profitable to commute /// the two-address instruction that's being processed. bool TwoAddressInstructionPass::isProfitableToCommute(unsigned regA, unsigned regB, @@ -628,6 +552,8 @@ TwoAddressInstructionPass::CommuteInstruction(MachineBasicBlock::iterator &mi, if (LV) // Update live variables LV->replaceKillInstruction(RegC, MI, NewMI); + if (Indexes) + Indexes->replaceMachineInstrInMaps(MI, NewMI); mbbi->insert(mi, NewMI); // Insert the new inst mbbi->erase(mi); // Nuke the old inst. @@ -676,6 +602,9 @@ TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi, DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI); bool Sunk = false; + if (Indexes) + Indexes->replaceMachineInstrInMaps(mi, NewMI); + if (NewMI->findRegisterUseOperand(RegB, false, TRI)) // FIXME: Temporary workaround. If the new instruction doesn't // uses RegB, convertToThreeAddress must have created more @@ -785,92 +714,6 @@ void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI, return; } -/// isSafeToDelete - If the specified instruction does not produce any side -/// effects and all of its defs are dead, then it's safe to delete. -static bool isSafeToDelete(MachineInstr *MI, - const TargetInstrInfo *TII, - SmallVector<unsigned, 4> &Kills) { - if (MI->mayStore() || MI->isCall()) - return false; - if (MI->isTerminator() || MI->hasUnmodeledSideEffects()) - return false; - - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) - continue; - if (MO.isDef() && !MO.isDead()) - return false; - if (MO.isUse() && MO.isKill()) - Kills.push_back(MO.getReg()); - } - return true; -} - -/// canUpdateDeletedKills - Check if all the registers listed in Kills are -/// killed by instructions in MBB preceding the current instruction at -/// position Dist. If so, return true and record information about the -/// preceding kills in NewKills. -bool TwoAddressInstructionPass:: -canUpdateDeletedKills(SmallVector<unsigned, 4> &Kills, - SmallVector<NewKill, 4> &NewKills, - MachineBasicBlock *MBB, unsigned Dist) { - while (!Kills.empty()) { - unsigned Kill = Kills.back(); - Kills.pop_back(); - if (TargetRegisterInfo::isPhysicalRegister(Kill)) - return false; - - MachineInstr *LastKill = FindLastUseInMBB(Kill, MBB, Dist); - if (!LastKill) - return false; - - bool isModRef = LastKill->definesRegister(Kill); - NewKills.push_back(std::make_pair(std::make_pair(Kill, isModRef), - LastKill)); - } - return true; -} - -/// DeleteUnusedInstr - If an instruction with a tied register operand can -/// be safely deleted, just delete it. -bool -TwoAddressInstructionPass::DeleteUnusedInstr(MachineBasicBlock::iterator &mi, - MachineBasicBlock::iterator &nmi, - MachineFunction::iterator &mbbi, - unsigned Dist) { - // Check if the instruction has no side effects and if all its defs are dead. - SmallVector<unsigned, 4> Kills; - if (!isSafeToDelete(mi, TII, Kills)) - return false; - - // If this instruction kills some virtual registers, we need to - // update the kill information. If it's not possible to do so, - // then bail out. - SmallVector<NewKill, 4> NewKills; - if (!canUpdateDeletedKills(Kills, NewKills, &*mbbi, Dist)) - return false; - - if (LV) { - while (!NewKills.empty()) { - MachineInstr *NewKill = NewKills.back().second; - unsigned Kill = NewKills.back().first.first; - bool isDead = NewKills.back().first.second; - NewKills.pop_back(); - if (LV->removeVirtualRegisterKilled(Kill, mi)) { - if (isDead) - LV->addVirtualRegisterDead(Kill, NewKill); - else - LV->addVirtualRegisterKilled(Kill, NewKill); - } - } - } - - mbbi->erase(mi); // Nuke the old inst. - mi = nmi; - return true; -} - /// RescheduleMIBelowKill - If there is one more local instruction that reads /// 'Reg' and it kills 'Reg, consider moving the instruction below the kill /// instruction in order to eliminate the need for the copy. @@ -1000,6 +843,8 @@ TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB, // Update live variables LV->removeVirtualRegisterKilled(Reg, KillMI); LV->addVirtualRegisterKilled(Reg, MI); + if (LIS) + LIS->handleMove(MI); DEBUG(dbgs() << "\trescheduled below kill: " << *KillMI); return true; @@ -1154,6 +999,8 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB, // Update live variables LV->removeVirtualRegisterKilled(Reg, KillMI); LV->addVirtualRegisterKilled(Reg, MI); + if (LIS) + LIS->handleMove(KillMI); DEBUG(dbgs() << "\trescheduled kill: " << *KillMI); return true; @@ -1180,16 +1027,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, assert(TargetRegisterInfo::isVirtualRegister(regB) && "cannot make instruction into two-address form"); - - // If regA is dead and the instruction can be deleted, just delete - // it so it doesn't clobber regB. bool regBKilled = isKilled(MI, regB, MRI, TII); - if (!regBKilled && MI.getOperand(DstIdx).isDead() && - DeleteUnusedInstr(mi, nmi, mbbi, Dist)) { - ++NumDeletes; - DEBUG(dbgs() << "\tdeleted unused instruction.\n"); - return true; // Done with this instruction." - } if (TargetRegisterInfo::isVirtualRegister(regA)) ScanUses(regA, &*mbbi, Processed); @@ -1273,16 +1111,14 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, if (NewOpc != 0) { const MCInstrDesc &UnfoldMCID = TII->get(NewOpc); if (UnfoldMCID.getNumDefs() == 1) { - MachineFunction &MF = *mbbi->getParent(); - // Unfold the load. DEBUG(dbgs() << "2addr: UNFOLDING: " << MI); const TargetRegisterClass *RC = TRI->getAllocatableClass( - TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI, MF)); + TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI, *MF)); unsigned Reg = MRI->createVirtualRegister(RC); SmallVector<MachineInstr *, 2> NewMIs; - if (!TII->unfoldMemoryOperand(MF, &MI, Reg, + if (!TII->unfoldMemoryOperand(*MF, &MI, Reg, /*UnfoldLoad=*/true,/*UnfoldStore=*/false, NewMIs)) { DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n"); @@ -1359,15 +1195,177 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, return false; } +// Collect tied operands of MI that need to be handled. +// Rewrite trivial cases immediately. +// Return true if any tied operands where found, including the trivial ones. +bool TwoAddressInstructionPass:: +collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) { + const MCInstrDesc &MCID = MI->getDesc(); + bool AnyOps = false; + unsigned NumOps = MI->isInlineAsm() ? + MI->getNumOperands() : MCID.getNumOperands(); + + for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) { + unsigned DstIdx = 0; + if (!MI->isRegTiedToDefOperand(SrcIdx, &DstIdx)) + continue; + AnyOps = true; + MachineOperand &SrcMO = MI->getOperand(SrcIdx); + MachineOperand &DstMO = MI->getOperand(DstIdx); + unsigned SrcReg = SrcMO.getReg(); + unsigned DstReg = DstMO.getReg(); + // Tied constraint already satisfied? + if (SrcReg == DstReg) + continue; + + assert(SrcReg && SrcMO.isUse() && "two address instruction invalid"); + + // Deal with <undef> uses immediately - simply rewrite the src operand. + if (SrcMO.isUndef()) { + // Constrain the DstReg register class if required. + if (TargetRegisterInfo::isVirtualRegister(DstReg)) + if (const TargetRegisterClass *RC = TII->getRegClass(MCID, SrcIdx, + TRI, *MF)) + MRI->constrainRegClass(DstReg, RC); + SrcMO.setReg(DstReg); + DEBUG(dbgs() << "\t\trewrite undef:\t" << *MI); + continue; + } + TiedOperands[SrcReg].push_back(std::make_pair(SrcIdx, DstIdx)); + } + return AnyOps; +} + +// Process a list of tied MI operands that all use the same source register. +// The tied pairs are of the form (SrcIdx, DstIdx). +void +TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, + TiedPairList &TiedPairs, + unsigned &Dist) { + bool IsEarlyClobber = false; + bool RemovedKillFlag = false; + bool AllUsesCopied = true; + unsigned LastCopiedReg = 0; + unsigned RegB = 0; + for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) { + unsigned SrcIdx = TiedPairs[tpi].first; + unsigned DstIdx = TiedPairs[tpi].second; + + const MachineOperand &DstMO = MI->getOperand(DstIdx); + unsigned RegA = DstMO.getReg(); + IsEarlyClobber |= DstMO.isEarlyClobber(); + + // Grab RegB from the instruction because it may have changed if the + // instruction was commuted. + RegB = MI->getOperand(SrcIdx).getReg(); + + if (RegA == RegB) { + // The register is tied to multiple destinations (or else we would + // not have continued this far), but this use of the register + // already matches the tied destination. Leave it. + AllUsesCopied = false; + continue; + } + LastCopiedReg = RegA; + + assert(TargetRegisterInfo::isVirtualRegister(RegB) && + "cannot make instruction into two-address form"); + +#ifndef NDEBUG + // First, verify that we don't have a use of "a" in the instruction + // (a = b + a for example) because our transformation will not + // work. This should never occur because we are in SSA form. + for (unsigned i = 0; i != MI->getNumOperands(); ++i) + assert(i == DstIdx || + !MI->getOperand(i).isReg() || + MI->getOperand(i).getReg() != RegA); +#endif + + // Emit a copy. + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + TII->get(TargetOpcode::COPY), RegA).addReg(RegB); + + // Update DistanceMap. + MachineBasicBlock::iterator PrevMI = MI; + --PrevMI; + DistanceMap.insert(std::make_pair(PrevMI, Dist)); + DistanceMap[MI] = ++Dist; + + SlotIndex CopyIdx; + if (Indexes) + CopyIdx = Indexes->insertMachineInstrInMaps(PrevMI).getRegSlot(); + + DEBUG(dbgs() << "\t\tprepend:\t" << *PrevMI); + + MachineOperand &MO = MI->getOperand(SrcIdx); + assert(MO.isReg() && MO.getReg() == RegB && MO.isUse() && + "inconsistent operand info for 2-reg pass"); + if (MO.isKill()) { + MO.setIsKill(false); + RemovedKillFlag = true; + } + + // Make sure regA is a legal regclass for the SrcIdx operand. + if (TargetRegisterInfo::isVirtualRegister(RegA) && + TargetRegisterInfo::isVirtualRegister(RegB)) + MRI->constrainRegClass(RegA, MRI->getRegClass(RegB)); + + MO.setReg(RegA); + + // Propagate SrcRegMap. + SrcRegMap[RegA] = RegB; + } + + + if (AllUsesCopied) { + if (!IsEarlyClobber) { + // Replace other (un-tied) uses of regB with LastCopiedReg. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) { + if (MO.isKill()) { + MO.setIsKill(false); + RemovedKillFlag = true; + } + MO.setReg(LastCopiedReg); + } + } + } + + // Update live variables for regB. + if (RemovedKillFlag && LV && LV->getVarInfo(RegB).removeKill(MI)) { + MachineBasicBlock::iterator PrevMI = MI; + --PrevMI; + LV->addVirtualRegisterKilled(RegB, PrevMI); + } + + } else if (RemovedKillFlag) { + // Some tied uses of regB matched their destination registers, so + // regB is still used in this instruction, but a kill flag was + // removed from a different tied use of regB, so now we need to add + // a kill flag to one of the remaining uses of regB. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) { + MO.setIsKill(true); + break; + } + } + } +} + /// runOnMachineFunction - Reduce two-address instructions to two operands. /// -bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { - const TargetMachine &TM = MF.getTarget(); - MRI = &MF.getRegInfo(); +bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { + MF = &Func; + const TargetMachine &TM = MF->getTarget(); + MRI = &MF->getRegInfo(); TII = TM.getInstrInfo(); TRI = TM.getRegisterInfo(); InstrItins = TM.getInstrItineraryData(); + Indexes = getAnalysisIfAvailable<SlotIndexes>(); LV = getAnalysisIfAvailable<LiveVariables>(); + LIS = getAnalysisIfAvailable<LiveIntervals>(); AA = &getAnalysis<AliasAnalysis>(); OptLevel = TM.getOptLevel(); @@ -1375,20 +1373,15 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** REWRITING TWO-ADDR INSTRS **********\n"); DEBUG(dbgs() << "********** Function: " - << MF.getFunction()->getName() << '\n'); + << MF->getFunction()->getName() << '\n'); // This pass takes the function out of SSA form. MRI->leaveSSA(); - // ReMatRegs - Keep track of the registers whose def's are remat'ed. - BitVector ReMatRegs(MRI->getNumVirtRegs()); - - typedef DenseMap<unsigned, SmallVector<std::pair<unsigned, unsigned>, 4> > - TiedOperandMap; - TiedOperandMap TiedOperands(4); + TiedOperandMap TiedOperands; SmallPtrSet<MachineInstr*, 8> Processed; - for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end(); + for (MachineFunction::iterator mbbi = MF->begin(), mbbe = MF->end(); mbbi != mbbe; ++mbbi) { unsigned Dist = 0; DistanceMap.clear(); @@ -1407,50 +1400,21 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { if (mi->isRegSequence()) RegSequences.push_back(&*mi); - const MCInstrDesc &MCID = mi->getDesc(); - bool FirstTied = true; - DistanceMap.insert(std::make_pair(mi, ++Dist)); ProcessCopy(&*mi, &*mbbi, Processed); // First scan through all the tied register uses in this instruction // and record a list of pairs of tied operands for each register. - unsigned NumOps = mi->isInlineAsm() - ? mi->getNumOperands() : MCID.getNumOperands(); - for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) { - unsigned DstIdx = 0; - if (!mi->isRegTiedToDefOperand(SrcIdx, &DstIdx)) - continue; - - if (FirstTied) { - FirstTied = false; - ++NumTwoAddressInstrs; - DEBUG(dbgs() << '\t' << *mi); - } - - assert(mi->getOperand(SrcIdx).isReg() && - mi->getOperand(SrcIdx).getReg() && - mi->getOperand(SrcIdx).isUse() && - "two address instruction invalid"); - - unsigned regB = mi->getOperand(SrcIdx).getReg(); - - // Deal with <undef> uses immediately - simply rewrite the src operand. - if (mi->getOperand(SrcIdx).isUndef()) { - unsigned DstReg = mi->getOperand(DstIdx).getReg(); - // Constrain the DstReg register class if required. - if (TargetRegisterInfo::isVirtualRegister(DstReg)) - if (const TargetRegisterClass *RC = TII->getRegClass(MCID, SrcIdx, - TRI, MF)) - MRI->constrainRegClass(DstReg, RC); - mi->getOperand(SrcIdx).setReg(DstReg); - DEBUG(dbgs() << "\t\trewrite undef:\t" << *mi); - continue; - } - TiedOperands[regB].push_back(std::make_pair(SrcIdx, DstIdx)); + if (!collectTiedOperands(mi, TiedOperands)) { + mi = nmi; + continue; } + ++NumTwoAddressInstrs; + MadeChange = true; + DEBUG(dbgs() << '\t' << *mi); + // If the instruction has a single pair of tied operands, try some // transformations that may either eliminate the tied operands or // improve the opportunities for coalescing away the register copy. @@ -1477,139 +1441,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { // Now iterate over the information collected above. for (TiedOperandMap::iterator OI = TiedOperands.begin(), OE = TiedOperands.end(); OI != OE; ++OI) { - SmallVector<std::pair<unsigned, unsigned>, 4> &TiedPairs = OI->second; - - bool IsEarlyClobber = false; - bool RemovedKillFlag = false; - bool AllUsesCopied = true; - unsigned LastCopiedReg = 0; - unsigned regB = OI->first; - for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) { - unsigned SrcIdx = TiedPairs[tpi].first; - unsigned DstIdx = TiedPairs[tpi].second; - - const MachineOperand &DstMO = mi->getOperand(DstIdx); - unsigned regA = DstMO.getReg(); - IsEarlyClobber |= DstMO.isEarlyClobber(); - - // Grab regB from the instruction because it may have changed if the - // instruction was commuted. - regB = mi->getOperand(SrcIdx).getReg(); - - if (regA == regB) { - // The register is tied to multiple destinations (or else we would - // not have continued this far), but this use of the register - // already matches the tied destination. Leave it. - AllUsesCopied = false; - continue; - } - LastCopiedReg = regA; - - assert(TargetRegisterInfo::isVirtualRegister(regB) && - "cannot make instruction into two-address form"); - -#ifndef NDEBUG - // First, verify that we don't have a use of "a" in the instruction - // (a = b + a for example) because our transformation will not - // work. This should never occur because we are in SSA form. - for (unsigned i = 0; i != mi->getNumOperands(); ++i) - assert(i == DstIdx || - !mi->getOperand(i).isReg() || - mi->getOperand(i).getReg() != regA); -#endif - - // Emit a copy or rematerialize the definition. - bool isCopy = false; - const TargetRegisterClass *rc = MRI->getRegClass(regB); - MachineInstr *DefMI = MRI->getUniqueVRegDef(regB); - // If it's safe and profitable, remat the definition instead of - // copying it. - if (DefMI && - DefMI->isAsCheapAsAMove() && - DefMI->isSafeToReMat(TII, AA, regB) && - isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){ - DEBUG(dbgs() << "2addr: REMATTING : " << *DefMI << "\n"); - unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg(); - TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, *TRI); - ReMatRegs.set(TargetRegisterInfo::virtReg2Index(regB)); - ++NumReMats; - } else { - BuildMI(*mbbi, mi, mi->getDebugLoc(), TII->get(TargetOpcode::COPY), - regA).addReg(regB); - isCopy = true; - } - - // Update DistanceMap. - MachineBasicBlock::iterator prevMI = prior(mi); - DistanceMap.insert(std::make_pair(prevMI, Dist)); - DistanceMap[mi] = ++Dist; - - DEBUG(dbgs() << "\t\tprepend:\t" << *prevMI); - - MachineOperand &MO = mi->getOperand(SrcIdx); - assert(MO.isReg() && MO.getReg() == regB && MO.isUse() && - "inconsistent operand info for 2-reg pass"); - if (MO.isKill()) { - MO.setIsKill(false); - RemovedKillFlag = true; - } - - // Make sure regA is a legal regclass for the SrcIdx operand. - if (TargetRegisterInfo::isVirtualRegister(regA) && - TargetRegisterInfo::isVirtualRegister(regB)) - MRI->constrainRegClass(regA, MRI->getRegClass(regB)); - - MO.setReg(regA); - - if (isCopy) - // Propagate SrcRegMap. - SrcRegMap[regA] = regB; - } - - if (AllUsesCopied) { - if (!IsEarlyClobber) { - // Replace other (un-tied) uses of regB with LastCopiedReg. - for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { - MachineOperand &MO = mi->getOperand(i); - if (MO.isReg() && MO.getReg() == regB && MO.isUse()) { - if (MO.isKill()) { - MO.setIsKill(false); - RemovedKillFlag = true; - } - MO.setReg(LastCopiedReg); - } - } - } - - // Update live variables for regB. - if (RemovedKillFlag && LV && LV->getVarInfo(regB).removeKill(mi)) - LV->addVirtualRegisterKilled(regB, prior(mi)); - - } else if (RemovedKillFlag) { - // Some tied uses of regB matched their destination registers, so - // regB is still used in this instruction, but a kill flag was - // removed from a different tied use of regB, so now we need to add - // a kill flag to one of the remaining uses of regB. - for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { - MachineOperand &MO = mi->getOperand(i); - if (MO.isReg() && MO.getReg() == regB && MO.isUse()) { - MO.setIsKill(true); - break; - } - } - } - - // We didn't change anything if there was a single tied pair, and that - // pair didn't require copies. - if (AllUsesCopied || TiedPairs.size() > 1) { - MadeChange = true; - - // Schedule the source copy / remat inserted to form two-address - // instruction. FIXME: Does it matter the distance map may not be - // accurate after it's scheduled? - TII->scheduleTwoAddrSource(prior(mi), mi, *TRI); - } - + processTiedPairs(mi, OI->second, Dist); DEBUG(dbgs() << "\t\trewrite to:\t" << *mi); } @@ -1634,15 +1466,6 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { } } - // Some remat'ed instructions are dead. - for (int i = ReMatRegs.find_first(); i != -1; i = ReMatRegs.find_next(i)) { - unsigned VReg = TargetRegisterInfo::index2VirtReg(i); - if (MRI->use_nodbg_empty(VReg)) { - MachineInstr *DefMI = MRI->getVRegDef(VReg); - DefMI->eraseFromParent(); - } - } - // Eliminate REG_SEQUENCE instructions. Their whole purpose was to preseve // SSA form. It's now safe to de-SSA. MadeChange |= EliminateRegSequences(); |
