diff options
Diffstat (limited to 'lib/CodeGen')
44 files changed, 1821 insertions, 1676 deletions
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index d5926f9..dd3fb3b 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1864,13 +1864,12 @@ static void EmitGlobalConstantLargeInt(const ConstantInt *CI, static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, AsmPrinter &AP) { - if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV)) { - uint64_t Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType()); + const TargetData *TD = AP.TM.getTargetData(); + uint64_t Size = TD->getTypeAllocSize(CV->getType()); + if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV)) return AP.OutStreamer.EmitZeros(Size, AddrSpace); - } if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { - unsigned Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType()); switch (Size) { case 1: case 2: @@ -1891,7 +1890,6 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, return EmitGlobalConstantFP(CFP, AddrSpace, AP); if (isa<ConstantPointerNull>(CV)) { - unsigned Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType()); AP.OutStreamer.EmitIntValue(0, Size, AddrSpace); return; } @@ -1905,20 +1903,28 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) return EmitGlobalConstantStruct(CVS, AddrSpace, AP); - // Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of - // vectors). - if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) + if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) { + // Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of + // vectors). if (CE->getOpcode() == Instruction::BitCast) return EmitGlobalConstantImpl(CE->getOperand(0), AddrSpace, AP); + + if (Size > 8) { + // If the constant expression's size is greater than 64-bits, then we have + // to emit the value in chunks. Try to constant fold the value and emit it + // that way. + Constant *New = ConstantFoldConstantExpression(CE, TD); + if (New && New != CE) + return EmitGlobalConstantImpl(New, AddrSpace, AP); + } + } if (const ConstantVector *V = dyn_cast<ConstantVector>(CV)) return EmitGlobalConstantVector(V, AddrSpace, AP); // Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it // thread the streamer with EmitValue. - AP.OutStreamer.EmitValue(LowerConstant(CV, AP), - AP.TM.getTargetData()->getTypeAllocSize(CV->getType()), - AddrSpace); + AP.OutStreamer.EmitValue(LowerConstant(CV, AP), Size, AddrSpace); } /// EmitGlobalConstant - Print a general LLVM constant to the .s file. @@ -2102,27 +2108,22 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const { OutStreamer.EmitLabel(Syms[i]); } + // Print some verbose block comments. + if (isVerbose()) { + if (const BasicBlock *BB = MBB->getBasicBlock()) + if (BB->hasName()) + OutStreamer.AddComment("%" + BB->getName()); + EmitBasicBlockLoopComments(*MBB, LI, *this); + } + // Print the main label for the block. if (MBB->pred_empty() || isBlockOnlyReachableByFallthrough(MBB)) { if (isVerbose() && OutStreamer.hasRawTextSupport()) { - if (const BasicBlock *BB = MBB->getBasicBlock()) - if (BB->hasName()) - OutStreamer.AddComment("%" + BB->getName()); - - EmitBasicBlockLoopComments(*MBB, LI, *this); - // NOTE: Want this comment at start of line, don't emit with AddComment. OutStreamer.EmitRawText(Twine(MAI->getCommentString()) + " BB#" + Twine(MBB->getNumber()) + ":"); } } else { - if (isVerbose()) { - if (const BasicBlock *BB = MBB->getBasicBlock()) - if (BB->hasName()) - OutStreamer.AddComment("%" + BB->getName()); - EmitBasicBlockLoopComments(*MBB, LI, *this); - } - OutStreamer.EmitLabel(MBB->getSymbol()); } } diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 51c635e..3b383f6 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -156,13 +156,12 @@ void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) { // Verify subprogram. if (!SP.Verify()) return; - // If the line number is 0, don't add it. - if (SP.getLineNumber() == 0) - return; + // If the line number is 0, don't add it. unsigned Line = SP.getLineNumber(); - if (!SP.getContext().Verify()) + if (Line == 0) return; + unsigned FileID = DD->GetOrCreateSourceID(SP.getFilename(), SP.getDirectory()); assert(FileID && "Invalid file id"); @@ -178,7 +177,7 @@ void CompileUnit::addSourceLine(DIE *Die, DIType Ty) { return; unsigned Line = Ty.getLineNumber(); - if (Line == 0 || !Ty.getContext().Verify()) + if (Line == 0) return; unsigned FileID = DD->GetOrCreateSourceID(Ty.getFilename(), Ty.getDirectory()); @@ -870,11 +869,6 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { if (CTy.isAppleBlockExtension()) addUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1); - unsigned RLang = CTy.getRunTimeLang(); - if (RLang) - addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class, - dwarf::DW_FORM_data1, RLang); - DICompositeType ContainingType = CTy.getContainingType(); if (DIDescriptor(ContainingType).isCompositeType()) addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, @@ -922,6 +916,12 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { // Add source line info if available. if (!CTy.isForwardDecl()) addSourceLine(&Buffer, CTy); + + // No harm in adding the runtime language to the declaration. + unsigned RLang = CTy.getRunTimeLang(); + if (RLang) + addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class, + dwarf::DW_FORM_data1, RLang); } } @@ -1006,6 +1006,9 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { // Add function template parameters. addTemplateParams(*SPDie, SP.getTemplateParams()); + // Unfortunately this code needs to stay here to work around + // a bug in older gdbs that requires the linkage name to resolve + // multiple template functions. StringRef LinkageName = SP.getLinkageName(); if (!LinkageName.empty()) addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index fa62169..388cef4 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -523,20 +523,19 @@ unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName, DirName = ""; unsigned SrcId = SourceIdMap.size()+1; - std::pair<std::string, std::string> SourceName = - std::make_pair(FileName, DirName); - std::pair<std::pair<std::string, std::string>, unsigned> Entry = - make_pair(SourceName, SrcId); - std::map<std::pair<std::string, std::string>, unsigned>::iterator I; - bool NewlyInserted; - llvm::tie(I, NewlyInserted) = SourceIdMap.insert(Entry); - if (!NewlyInserted) - return I->second; + // We look up the file/dir pair by concatenating them with a zero byte. + SmallString<128> NamePair; + NamePair += DirName; + NamePair += '\0'; // Zero bytes are not allowed in paths. + NamePair += FileName; + + StringMapEntry<unsigned> &Ent = SourceIdMap.GetOrCreateValue(NamePair, SrcId); + if (Ent.getValue() != SrcId) + return Ent.getValue(); // Print out a .file directive to specify files for .loc directives. - Asm->OutStreamer.EmitDwarfFileDirective(SrcId, Entry.first.second, - Entry.first.first); + Asm->OutStreamer.EmitDwarfFileDirective(SrcId, DirName, FileName); return SrcId; } diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 8b802d2..83f30f5 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -26,7 +26,6 @@ #include "llvm/ADT/UniqueVector.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/DebugLoc.h" -#include <map> namespace llvm { @@ -209,9 +208,9 @@ class DwarfDebug { /// std::vector<DIEAbbrev *> Abbreviations; - /// SourceIdMap - Source id map, i.e. pair of source filename and directory - /// mapped to a unique id. - std::map<std::pair<std::string, std::string>, unsigned> SourceIdMap; + /// SourceIdMap - Source id map, i.e. pair of source filename and directory, + /// separated by a zero byte, mapped to a unique id. + StringMap<unsigned> SourceIdMap; /// StringPool - A String->Symbol mapping of strings used by indirect /// references. diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 59c92b3..f57f4a8 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -1019,12 +1019,27 @@ static bool IsBetterFallthrough(MachineBasicBlock *MBB1, return MBB2I->isCall() && !MBB1I->isCall(); } +/// getBranchDebugLoc - Find and return, if any, the DebugLoc of the branch +/// instructions on the block. Always use the DebugLoc of the first +/// branching instruction found unless its absent, in which case use the +/// DebugLoc of the second if present. +static DebugLoc getBranchDebugLoc(MachineBasicBlock &MBB) { + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin()) + return DebugLoc(); + --I; + while (I->isDebugValue() && I != MBB.begin()) + --I; + if (I->isBranch()) + return I->getDebugLoc(); + return DebugLoc(); +} + /// OptimizeBlock - Analyze and optimize control flow related to the specified /// block. This is never called on the entry block. bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { bool MadeChange = false; MachineFunction &MF = *MBB->getParent(); - DebugLoc dl; // FIXME: this is nowhere ReoptimizeBlock: MachineFunction::iterator FallThrough = MBB; @@ -1073,6 +1088,7 @@ ReoptimizeBlock: // destination, remove the branch, replacing it with an unconditional one or // a fall-through. if (PriorTBB && PriorTBB == PriorFBB) { + DebugLoc dl = getBranchDebugLoc(PrevBB); TII->RemoveBranch(PrevBB); PriorCond.clear(); if (PriorTBB != MBB) @@ -1130,6 +1146,7 @@ ReoptimizeBlock: // If the prior block branches somewhere else on the condition and here if // the condition is false, remove the uncond second branch. if (PriorFBB == MBB) { + DebugLoc dl = getBranchDebugLoc(PrevBB); TII->RemoveBranch(PrevBB); TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond, dl); MadeChange = true; @@ -1143,6 +1160,7 @@ ReoptimizeBlock: if (PriorTBB == MBB) { SmallVector<MachineOperand, 4> NewPriorCond(PriorCond); if (!TII->ReverseBranchCondition(NewPriorCond)) { + DebugLoc dl = getBranchDebugLoc(PrevBB); TII->RemoveBranch(PrevBB); TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond, dl); MadeChange = true; @@ -1180,6 +1198,7 @@ ReoptimizeBlock: DEBUG(dbgs() << "\nMoving MBB: " << *MBB << "To make fallthrough to: " << *PriorTBB << "\n"); + DebugLoc dl = getBranchDebugLoc(PrevBB); TII->RemoveBranch(PrevBB); TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond, dl); @@ -1209,6 +1228,7 @@ ReoptimizeBlock: if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) { SmallVector<MachineOperand, 4> NewCond(CurCond); if (!TII->ReverseBranchCondition(NewCond)) { + DebugLoc dl = getBranchDebugLoc(*MBB); TII->RemoveBranch(*MBB); TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond, dl); MadeChange = true; @@ -1222,6 +1242,7 @@ ReoptimizeBlock: if (CurTBB && CurCond.empty() && CurFBB == 0 && IsBranchOnlyBlock(MBB) && CurTBB != MBB && !MBB->hasAddressTaken()) { + DebugLoc dl = getBranchDebugLoc(*MBB); // This block may contain just an unconditional branch. Because there can // be 'non-branch terminators' in the block, try removing the branch and // then seeing if the block is empty. @@ -1264,8 +1285,9 @@ ReoptimizeBlock: assert(PriorFBB == 0 && "Machine CFG out of date!"); PriorFBB = MBB; } + DebugLoc pdl = getBranchDebugLoc(PrevBB); TII->RemoveBranch(PrevBB); - TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, dl); + TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, pdl); } // Iterate through all the predecessors, revectoring each in-turn. @@ -1289,9 +1311,10 @@ ReoptimizeBlock: bool NewCurUnAnalyzable = TII->AnalyzeBranch(*PMBB, NewCurTBB, NewCurFBB, NewCurCond, true); if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) { + DebugLoc pdl = getBranchDebugLoc(*PMBB); TII->RemoveBranch(*PMBB); NewCurCond.clear(); - TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond, dl); + TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond, pdl); MadeChange = true; ++NumBranchOpts; PMBB->CorrectExtraCFGEdges(NewCurTBB, 0, false); @@ -1351,7 +1374,7 @@ ReoptimizeBlock: if (CurFallsThru) { MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB)); CurCond.clear(); - TII->InsertBranch(*MBB, NextBB, 0, CurCond, dl); + TII->InsertBranch(*MBB, NextBB, 0, CurCond, DebugLoc()); } MBB->moveAfter(PredBB); MadeChange = true; diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 0362365..21729cd 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -80,7 +80,6 @@ add_llvm_library(LLVMCodeGen RegisterScavenging.cpp RenderMachineFunction.cpp ScheduleDAG.cpp - ScheduleDAGEmit.cpp ScheduleDAGInstrs.cpp ScheduleDAGPrinter.cpp ScoreboardHazardRecognizer.cpp diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index c684cdc..bad5010 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -35,7 +35,8 @@ CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo &RCI) : RegClassInfo(RCI), Classes(TRI->getNumRegs(), static_cast<const TargetRegisterClass *>(0)), KillIndices(TRI->getNumRegs(), 0), - DefIndices(TRI->getNumRegs(), 0) {} + DefIndices(TRI->getNumRegs(), 0), + KeepRegs(TRI->getNumRegs(), false) {} CriticalAntiDepBreaker::~CriticalAntiDepBreaker() { } @@ -52,9 +53,9 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { } // Clear "do not change" set. - KeepRegs.clear(); + KeepRegs.reset(); - bool IsReturnBlock = (!BB->empty() && BB->back().isReturn()); + bool IsReturnBlock = (BBSize != 0 && BB->back().isReturn()); // Determine the live-out physregs for this block. if (IsReturnBlock) { @@ -63,14 +64,14 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { E = MRI.liveout_end(); I != E; ++I) { unsigned Reg = *I; Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); - KillIndices[Reg] = BB->size(); + KillIndices[Reg] = BBSize; DefIndices[Reg] = ~0u; // Repeat, for all aliases. for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { unsigned AliasReg = *Alias; Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); - KillIndices[AliasReg] = BB->size(); + KillIndices[AliasReg] = BBSize; DefIndices[AliasReg] = ~0u; } } @@ -85,14 +86,14 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { E = (*SI)->livein_end(); I != E; ++I) { unsigned Reg = *I; Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); - KillIndices[Reg] = BB->size(); + KillIndices[Reg] = BBSize; DefIndices[Reg] = ~0u; // Repeat, for all aliases. for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { unsigned AliasReg = *Alias; Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); - KillIndices[AliasReg] = BB->size(); + KillIndices[AliasReg] = BBSize; DefIndices[AliasReg] = ~0u; } } @@ -106,14 +107,14 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { unsigned Reg = *I; if (!IsReturnBlock && !Pristine.test(Reg)) continue; Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); - KillIndices[Reg] = BB->size(); + KillIndices[Reg] = BBSize; DefIndices[Reg] = ~0u; // Repeat, for all aliases. for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { unsigned AliasReg = *Alias; Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); - KillIndices[AliasReg] = BB->size(); + KillIndices[AliasReg] = BBSize; DefIndices[AliasReg] = ~0u; } } @@ -121,7 +122,7 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { void CriticalAntiDepBreaker::FinishBlock() { RegRefs.clear(); - KeepRegs.clear(); + KeepRegs.reset(); } void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count, @@ -233,10 +234,11 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { RegRefs.insert(std::make_pair(Reg, &MO)); if (MO.isUse() && Special) { - if (KeepRegs.insert(Reg)) { + if (!KeepRegs.test(Reg)) { + KeepRegs.set(Reg); for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); *Subreg; ++Subreg) - KeepRegs.insert(*Subreg); + KeepRegs.set(*Subreg); } } } @@ -259,7 +261,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, if (MO.clobbersPhysReg(i)) { DefIndices[i] = Count; KillIndices[i] = ~0u; - KeepRegs.erase(i); + KeepRegs.reset(i); Classes[i] = 0; RegRefs.erase(i); } @@ -276,7 +278,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, assert(((KillIndices[Reg] == ~0u) != (DefIndices[Reg] == ~0u)) && "Kill and Def maps aren't consistent for Reg!"); - KeepRegs.erase(Reg); + KeepRegs.reset(Reg); Classes[Reg] = 0; RegRefs.erase(Reg); // Repeat, for all subregs. @@ -285,7 +287,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, unsigned SubregReg = *Subreg; DefIndices[SubregReg] = Count; KillIndices[SubregReg] = ~0u; - KeepRegs.erase(SubregReg); + KeepRegs.reset(SubregReg); Classes[SubregReg] = 0; RegRefs.erase(SubregReg); } @@ -551,7 +553,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, if (!RegClassInfo.isAllocatable(AntiDepReg)) // Don't break anti-dependencies on non-allocatable registers. AntiDepReg = 0; - else if (KeepRegs.count(AntiDepReg)) + else if (KeepRegs.test(AntiDepReg)) // Don't break anti-dependencies if an use down below requires // this exact register. AntiDepReg = 0; diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h index 0710780..7746259 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.h +++ b/lib/CodeGen/CriticalAntiDepBreaker.h @@ -24,7 +24,6 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/SmallSet.h" #include <map> namespace llvm { @@ -66,7 +65,7 @@ class TargetRegisterInfo; /// KeepRegs - A set of registers which are live and cannot be changed to /// break anti-dependencies. - SmallSet<unsigned, 4> KeepRegs; + BitVector KeepRegs; public: CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo&); diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp index f0cf290..5ff641c 100644 --- a/lib/CodeGen/DFAPacketizer.cpp +++ b/lib/CodeGen/DFAPacketizer.cpp @@ -23,10 +23,10 @@ // //===----------------------------------------------------------------------===// -#include "ScheduleDAGInstrs.h" #include "llvm/CodeGen/DFAPacketizer.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/MC/MCInstrItineraries.h" using namespace llvm; @@ -103,15 +103,12 @@ void DFAPacketizer::reserveResources(llvm::MachineInstr *MI) { namespace { // DefaultVLIWScheduler - This class extends ScheduleDAGInstrs and overrides // Schedule method to build the dependence graph. -// -// ScheduleDAGInstrs has LLVM_LIBRARY_VISIBILITY so we have to reference it as -// an opaque pointer in VLIWPacketizerList. class DefaultVLIWScheduler : public ScheduleDAGInstrs { public: DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, bool IsPostRA); // Schedule - Actual scheduling work. - void Schedule(); + void schedule(); }; } // end anonymous namespace @@ -121,9 +118,9 @@ DefaultVLIWScheduler::DefaultVLIWScheduler( ScheduleDAGInstrs(MF, MLI, MDT, IsPostRA) { } -void DefaultVLIWScheduler::Schedule() { +void DefaultVLIWScheduler::schedule() { // Build the scheduling graph. - BuildSchedGraph(0); + buildSchedGraph(0); } // VLIWPacketizerList Ctor @@ -137,7 +134,7 @@ VLIWPacketizerList::VLIWPacketizerList( // VLIWPacketizerList Dtor VLIWPacketizerList::~VLIWPacketizerList() { - delete (DefaultVLIWScheduler *)SchedulerImpl; + delete SchedulerImpl; delete ResourceTracker; } @@ -184,18 +181,15 @@ void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB, void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, MachineBasicBlock::iterator BeginItr, MachineBasicBlock::iterator EndItr) { - DefaultVLIWScheduler *Scheduler = (DefaultVLIWScheduler *)SchedulerImpl; - Scheduler->Run(MBB, BeginItr, EndItr, MBB->size()); + assert(MBB->end() == EndItr && "Bad EndIndex"); - // Remember scheduling units. - SUnits = Scheduler->SUnits; + SchedulerImpl->enterRegion(MBB, BeginItr, EndItr, MBB->size()); - // Generate MI -> SU map. - std::map <MachineInstr*, SUnit*> MIToSUnit; - for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { - SUnit *SU = &SUnits[i]; - MIToSUnit[SU->getInstr()] = SU; - } + // Build the DAG without reordering instructions. + SchedulerImpl->schedule(); + + // Remember scheduling units. + SUnits = SchedulerImpl->SUnits; // The main packetizer loop. for (; BeginItr != EndItr; ++BeginItr) { @@ -211,7 +205,7 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, continue; } - SUnit *SUI = MIToSUnit[MI]; + SUnit *SUI = SchedulerImpl->getSUnit(MI); assert(SUI && "Missing SUnit Info!"); // Ask DFA if machine resource is available for MI. @@ -221,7 +215,7 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, for (std::vector<MachineInstr*>::iterator VI = CurrentPacketMIs.begin(), VE = CurrentPacketMIs.end(); VI != VE; ++VI) { MachineInstr *MJ = *VI; - SUnit *SUJ = MIToSUnit[MJ]; + SUnit *SUJ = SchedulerImpl->getSUnit(MJ); assert(SUJ && "Missing SUnit Info!"); // Is it legal to packetize SUI and SUJ together. @@ -245,4 +239,6 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, // End any packet left behind. endPacket(MBB, EndItr); + + SchedulerImpl->exitRegion(); } diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 17633e2..97e6547 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -90,7 +90,7 @@ static void addPassesToHandleExceptions(TargetMachine *TM, // removed from the parent invoke(s). This could happen when a landing // pad is shared by multiple invokes and is also a target of a normal // edge from elsewhere. - PM.add(createSjLjEHPass(TM->getTargetLowering())); + PM.add(createSjLjEHPreparePass(TM->getTargetLowering())); // FALLTHROUGH case ExceptionHandling::DwarfCFI: case ExceptionHandling::ARM: diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp index 0578229..deab05a 100644 --- a/lib/CodeGen/LatencyPriorityQueue.cpp +++ b/lib/CodeGen/LatencyPriorityQueue.cpp @@ -84,11 +84,11 @@ void LatencyPriorityQueue::push(SUnit *SU) { } -// ScheduledNode - As nodes are scheduled, we look to see if there are any +// scheduledNode - As nodes are scheduled, we look to see if there are any // successor nodes that have a single unscheduled predecessor. If so, that // single predecessor has a higher priority, since scheduling it will make // the node available. -void LatencyPriorityQueue::ScheduledNode(SUnit *SU) { +void LatencyPriorityQueue::scheduledNode(SUnit *SU) { for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { AdjustPriorityOfUnscheduledPreds(I->getSUnit()); diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index c35302a..2187833 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -226,7 +226,7 @@ public: LiveInterval *LI, const VNInfo *VNI, SmallVectorImpl<SlotIndex> *Kills, LiveIntervals &LIS, MachineDominatorTree &MDT, - UserValueScopes &UVS); + UserValueScopes &UVS); /// addDefsFromCopies - The value in LI/LocNo may be copies to other /// registers. Determine if any of the copies are available at the kill @@ -486,7 +486,7 @@ void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, LiveInterval *LI, const VNInfo *VNI, SmallVectorImpl<SlotIndex> *Kills, LiveIntervals &LIS, MachineDominatorTree &MDT, - UserValueScopes &UVS) { + UserValueScopes &UVS) { SmallVector<SlotIndex, 16> Todo; Todo.push_back(Idx); do { @@ -620,7 +620,7 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI, LiveIntervals &LIS, MachineDominatorTree &MDT, - UserValueScopes &UVS) { + UserValueScopes &UVS) { SmallVector<std::pair<SlotIndex, unsigned>, 16> Defs; // Collect all defs to be extended (Skipping undefs). @@ -841,7 +841,7 @@ bool UserValue::splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) { bool DidChange = false; // Split locations referring to OldReg. Iterate backwards so splitLocation can - // safely erase unuused locations. + // safely erase unused locations. for (unsigned i = locations.size(); i ; --i) { unsigned LocNo = i-1; const MachineOperand *Loc = &locations[LocNo]; diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 70ed1c3..3ade660 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -1049,9 +1049,19 @@ public: bool hasRegMaskOp = false; collectRanges(MI, Entering, Internal, Exiting, hasRegMaskOp, OldIdx); - moveAllEnteringFrom(OldIdx, Entering); - moveAllInternalFrom(OldIdx, Internal); - moveAllExitingFrom(OldIdx, Exiting); + // To keep the LiveRanges valid within an interval, move the ranges closest + // to the destination first. This prevents ranges from overlapping, to that + // APIs like removeRange still work. + if (NewIdx < OldIdx) { + moveAllEnteringFrom(OldIdx, Entering); + moveAllInternalFrom(OldIdx, Internal); + moveAllExitingFrom(OldIdx, Exiting); + } + else { + moveAllExitingFrom(OldIdx, Exiting); + moveAllInternalFrom(OldIdx, Internal); + moveAllEnteringFrom(OldIdx, Entering); + } if (hasRegMaskOp) updateRegMaskSlots(OldIdx); @@ -1319,8 +1329,14 @@ private: void moveEnteringDownFrom(SlotIndex OldIdx, IntRangePair& P) { LiveInterval* LI = P.first; LiveRange* LR = P.second; + // Extend the LiveRange if NewIdx is past the end. if (NewIdx > LR->end) { - moveKillFlags(LI->reg, LR->end, NewIdx); + // Move kill flags if OldIdx was not originally the end + // (otherwise LR->end points to an invalid slot). + if (LR->end.getRegSlot() != OldIdx.getRegSlot()) { + assert(LR->end > OldIdx && "LiveRange does not cover original slot"); + moveKillFlags(LI->reg, LR->end, NewIdx); + } LR->end = NewIdx.getRegSlot(); } } diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index 9c3d255..48e1e4c 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -109,6 +109,7 @@ void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo, // Mark the variable known alive in this bb VRInfo.AliveBlocks.set(BBNum); + assert(MBB != &MF->front() && "Can't find reaching def for virtreg"); WorkList.insert(WorkList.end(), MBB->pred_rbegin(), MBB->pred_rend()); } diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 611b045..ca8a8e8 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -238,6 +238,18 @@ StringRef MachineBasicBlock::getName() const { return "(null)"; } +/// Return a hopefully unique identifier for this block. +std::string MachineBasicBlock::getFullName() const { + std::string Name; + if (getParent()) + Name = (getParent()->getFunction()->getName() + ":").str(); + if (getBasicBlock()) + Name += getBasicBlock()->getName(); + else + Name += (Twine("BB") + Twine(getNumber())).str(); + return Name; +} + void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const { const MachineFunction *MF = getParent(); if (!MF) { diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index e9f9475..43af1ad 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -40,6 +40,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/Hashing.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -481,7 +482,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) { /// MCID NULL and no operands. MachineInstr::MachineInstr() : MCID(0), Flags(0), AsmPrinterFlags(0), - MemRefs(0), MemRefsEnd(0), + NumMemRefs(0), MemRefs(0), Parent(0) { // Make sure that we get added to a machine basicblock LeakDetector::addGarbageObject(this); @@ -489,10 +490,10 @@ MachineInstr::MachineInstr() void MachineInstr::addImplicitDefUseOperands() { if (MCID->ImplicitDefs) - for (const unsigned *ImpDefs = MCID->ImplicitDefs; *ImpDefs; ++ImpDefs) + for (const uint16_t *ImpDefs = MCID->getImplicitDefs(); *ImpDefs; ++ImpDefs) addOperand(MachineOperand::CreateReg(*ImpDefs, true, true)); if (MCID->ImplicitUses) - for (const unsigned *ImpUses = MCID->ImplicitUses; *ImpUses; ++ImpUses) + for (const uint16_t *ImpUses = MCID->getImplicitUses(); *ImpUses; ++ImpUses) addOperand(MachineOperand::CreateReg(*ImpUses, false, true)); } @@ -501,7 +502,7 @@ void MachineInstr::addImplicitDefUseOperands() { /// the MCInstrDesc. MachineInstr::MachineInstr(const MCInstrDesc &tid, bool NoImp) : MCID(&tid), Flags(0), AsmPrinterFlags(0), - MemRefs(0), MemRefsEnd(0), Parent(0) { + NumMemRefs(0), MemRefs(0), Parent(0) { unsigned NumImplicitOps = 0; if (!NoImp) NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses(); @@ -516,7 +517,7 @@ MachineInstr::MachineInstr(const MCInstrDesc &tid, bool NoImp) MachineInstr::MachineInstr(const MCInstrDesc &tid, const DebugLoc dl, bool NoImp) : MCID(&tid), Flags(0), AsmPrinterFlags(0), - MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) { + NumMemRefs(0), MemRefs(0), Parent(0), debugLoc(dl) { unsigned NumImplicitOps = 0; if (!NoImp) NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses(); @@ -532,7 +533,7 @@ MachineInstr::MachineInstr(const MCInstrDesc &tid, const DebugLoc dl, /// basic block. MachineInstr::MachineInstr(MachineBasicBlock *MBB, const MCInstrDesc &tid) : MCID(&tid), Flags(0), AsmPrinterFlags(0), - MemRefs(0), MemRefsEnd(0), Parent(0) { + NumMemRefs(0), MemRefs(0), Parent(0) { assert(MBB && "Cannot use inserting ctor with null basic block!"); unsigned NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses(); @@ -548,7 +549,7 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const MCInstrDesc &tid) MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl, const MCInstrDesc &tid) : MCID(&tid), Flags(0), AsmPrinterFlags(0), - MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) { + NumMemRefs(0), MemRefs(0), Parent(0), debugLoc(dl) { assert(MBB && "Cannot use inserting ctor with null basic block!"); unsigned NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses(); @@ -563,7 +564,7 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl, /// MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) : MCID(&MI.getDesc()), Flags(0), AsmPrinterFlags(0), - MemRefs(MI.MemRefs), MemRefsEnd(MI.MemRefsEnd), + NumMemRefs(MI.NumMemRefs), MemRefs(MI.MemRefs), Parent(0), debugLoc(MI.getDebugLoc()) { Operands.reserve(MI.getNumOperands()); @@ -738,28 +739,23 @@ void MachineInstr::RemoveOperand(unsigned OpNo) { void MachineInstr::addMemOperand(MachineFunction &MF, MachineMemOperand *MO) { mmo_iterator OldMemRefs = MemRefs; - mmo_iterator OldMemRefsEnd = MemRefsEnd; + uint16_t OldNumMemRefs = NumMemRefs; - size_t NewNum = (MemRefsEnd - MemRefs) + 1; + uint16_t NewNum = NumMemRefs + 1; mmo_iterator NewMemRefs = MF.allocateMemRefsArray(NewNum); - mmo_iterator NewMemRefsEnd = NewMemRefs + NewNum; - std::copy(OldMemRefs, OldMemRefsEnd, NewMemRefs); + std::copy(OldMemRefs, OldMemRefs + OldNumMemRefs, NewMemRefs); NewMemRefs[NewNum - 1] = MO; MemRefs = NewMemRefs; - MemRefsEnd = NewMemRefsEnd; + NumMemRefs = NewNum; } -bool -MachineInstr::hasProperty(unsigned MCFlag, QueryType Type) const { - if (Type == IgnoreBundle || !isBundle()) - return getDesc().getFlags() & (1 << MCFlag); - +bool MachineInstr::hasPropertyInBundle(unsigned Mask, QueryType Type) const { const MachineBasicBlock *MBB = getParent(); MachineBasicBlock::const_instr_iterator MII = *this; ++MII; while (MII != MBB->end() && MII->isInsideBundle()) { - if (MII->getDesc().getFlags() & (1 << MCFlag)) { + if (MII->getDesc().getFlags() & Mask) { if (Type == AnyInBundle) return true; } else { @@ -1843,49 +1839,55 @@ void MachineInstr::setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs, unsigned MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) { - unsigned Hash = MI->getOpcode() * 37; + // Build up a buffer of hash code components. + // + // FIXME: This is a total hack. We should have a hash_value overload for + // MachineOperand, but currently that doesn't work because there are many + // different ideas of "equality" and thus different sets of information that + // contribute to the hash code. This one happens to want to take a specific + // subset. And it's still not clear that this routine uses the *correct* + // subset of information when computing the hash code. The goal is to use the + // same inputs for the hash code here that MachineInstr::isIdenticalTo uses to + // test for equality when passed the 'IgnoreVRegDefs' filter flag. It would + // be very useful to factor the selection of relevant inputs out of the two + // functions and into a common routine, but it's not clear how that can be + // done. + SmallVector<size_t, 8> HashComponents; + HashComponents.reserve(MI->getNumOperands() + 1); + HashComponents.push_back(MI->getOpcode()); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); - uint64_t Key = (uint64_t)MO.getType() << 32; switch (MO.getType()) { default: break; case MachineOperand::MO_Register: if (MO.isDef() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue; // Skip virtual register defs. - Key |= MO.getReg(); + HashComponents.push_back(hash_combine(MO.getType(), MO.getReg())); break; case MachineOperand::MO_Immediate: - Key |= MO.getImm(); + HashComponents.push_back(hash_combine(MO.getType(), MO.getImm())); break; case MachineOperand::MO_FrameIndex: case MachineOperand::MO_ConstantPoolIndex: case MachineOperand::MO_JumpTableIndex: - Key |= MO.getIndex(); + HashComponents.push_back(hash_combine(MO.getType(), MO.getIndex())); break; case MachineOperand::MO_MachineBasicBlock: - Key |= DenseMapInfo<void*>::getHashValue(MO.getMBB()); + HashComponents.push_back(hash_combine(MO.getType(), MO.getMBB())); break; case MachineOperand::MO_GlobalAddress: - Key |= DenseMapInfo<void*>::getHashValue(MO.getGlobal()); + HashComponents.push_back(hash_combine(MO.getType(), MO.getGlobal())); break; case MachineOperand::MO_BlockAddress: - Key |= DenseMapInfo<void*>::getHashValue(MO.getBlockAddress()); + HashComponents.push_back(hash_combine(MO.getType(), + MO.getBlockAddress())); break; case MachineOperand::MO_MCSymbol: - Key |= DenseMapInfo<void*>::getHashValue(MO.getMCSymbol()); + HashComponents.push_back(hash_combine(MO.getType(), MO.getMCSymbol())); break; } - Key += ~(Key << 32); - Key ^= (Key >> 22); - Key += ~(Key << 13); - Key ^= (Key >> 8); - Key += (Key << 3); - Key ^= (Key >> 15); - Key += ~(Key << 27); - Key ^= (Key >> 31); - Hash = (unsigned)Key + Hash * 37; - } - return Hash; + } + return hash_combine_range(HashComponents.begin(), HashComponents.end()); } void MachineInstr::emitError(StringRef Msg) const { diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp index d1f2df9..73489a7 100644 --- a/lib/CodeGen/MachineInstrBundle.cpp +++ b/lib/CodeGen/MachineInstrBundle.cpp @@ -229,6 +229,8 @@ bool llvm::finalizeBundles(MachineFunction &MF) { "First instr cannot be inside bundle before finalization!"); MachineBasicBlock::instr_iterator MIE = MBB.instr_end(); + if (MII == MIE) + continue; for (++MII; MII != MIE; ) { if (!MII->isInsideBundle()) ++MII; diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index 7d40e66..f140dec 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -161,9 +161,8 @@ void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) { /// form, so there should only be one definition. MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const { // Since we are in SSA form, we can use the first definition. - if (!def_empty(Reg)) - return &*def_begin(Reg); - return 0; + def_iterator I = def_begin(Reg); + return !I.atEnd() ? &*I : 0; } bool MachineRegisterInfo::hasOneUse(unsigned RegNo) const { diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index 8a485e0..364a244 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -14,10 +14,10 @@ #define DEBUG_TYPE "misched" -#include "ScheduleDAGInstrs.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/MachinePassRegistry.h" +#include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/CommandLine.h" @@ -25,25 +25,36 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/PriorityQueue.h" #include <queue> using namespace llvm; +static cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden, + cl::desc("Force top-down list scheduling")); +static cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden, + cl::desc("Force bottom-up list scheduling")); + +#ifndef NDEBUG +static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden, + cl::desc("Pop up a window to show MISched dags after they are processed")); + +static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden, + cl::desc("Stop scheduling after N instructions"), cl::init(~0U)); +#else +static bool ViewMISchedDAGs = false; +#endif // NDEBUG + //===----------------------------------------------------------------------===// // Machine Instruction Scheduling Pass and Registry //===----------------------------------------------------------------------===// namespace { /// MachineScheduler runs after coalescing and before register allocation. -class MachineScheduler : public MachineFunctionPass { +class MachineScheduler : public MachineSchedContext, + public MachineFunctionPass { public: - MachineFunction *MF; - const TargetInstrInfo *TII; - const MachineLoopInfo *MLI; - const MachineDominatorTree *MDT; - LiveIntervals *LIS; - MachineScheduler(); virtual void getAnalysisUsage(AnalysisUsage &AU) const; @@ -71,7 +82,7 @@ INITIALIZE_PASS_END(MachineScheduler, "misched", "Machine Instruction Scheduler", false, false) MachineScheduler::MachineScheduler() -: MachineFunctionPass(ID), MF(0), MLI(0), MDT(0) { +: MachineFunctionPass(ID) { initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); } @@ -80,7 +91,7 @@ void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequiredID(MachineDominatorsID); AU.addRequired<MachineLoopInfo>(); AU.addRequired<AliasAnalysis>(); - AU.addPreserved<AliasAnalysis>(); + AU.addRequired<TargetPassConfig>(); AU.addRequired<SlotIndexes>(); AU.addPreserved<SlotIndexes>(); AU.addRequired<LiveIntervals>(); @@ -88,91 +99,226 @@ void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } -namespace { -/// MachineSchedRegistry provides a selection of available machine instruction -/// schedulers. -class MachineSchedRegistry : public MachinePassRegistryNode { -public: - typedef ScheduleDAGInstrs *(*ScheduleDAGCtor)(MachineScheduler *); +MachinePassRegistry MachineSchedRegistry::Registry; - // RegisterPassParser requires a (misnamed) FunctionPassCtor type. - typedef ScheduleDAGCtor FunctionPassCtor; +/// A dummy default scheduler factory indicates whether the scheduler +/// is overridden on the command line. +static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) { + return 0; +} - static MachinePassRegistry Registry; +/// MachineSchedOpt allows command line selection of the scheduler. +static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false, + RegisterPassParser<MachineSchedRegistry> > +MachineSchedOpt("misched", + cl::init(&useDefaultMachineSched), cl::Hidden, + cl::desc("Machine instruction scheduler to use")); - MachineSchedRegistry(const char *N, const char *D, ScheduleDAGCtor C) - : MachinePassRegistryNode(N, D, (MachinePassCtor)C) { - Registry.Add(this); - } - ~MachineSchedRegistry() { Registry.Remove(this); } +static MachineSchedRegistry +DefaultSchedRegistry("default", "Use the target's default scheduler choice.", + useDefaultMachineSched); + +/// Forward declare the standard machine scheduler. This will be used as the +/// default scheduler if the target does not set a default. +static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C); + +/// Top-level MachineScheduler pass driver. +/// +/// Visit blocks in function order. Divide each block into scheduling regions +/// and visit them bottom-up. Visiting regions bottom-up is not required, but is +/// consistent with the DAG builder, which traverses the interior of the +/// scheduling regions bottom-up. +/// +/// This design avoids exposing scheduling boundaries to the DAG builder, +/// simplifying the DAG builder's support for "special" target instructions. +/// At the same time the design allows target schedulers to operate across +/// scheduling boundaries, for example to bundle the boudary instructions +/// without reordering them. This creates complexity, because the target +/// scheduler must update the RegionBegin and RegionEnd positions cached by +/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler +/// design would be to split blocks at scheduling boundaries, but LLVM has a +/// general bias against block splitting purely for implementation simplicity. +bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { + // Initialize the context of the pass. + MF = &mf; + MLI = &getAnalysis<MachineLoopInfo>(); + MDT = &getAnalysis<MachineDominatorTree>(); + PassConfig = &getAnalysis<TargetPassConfig>(); + AA = &getAnalysis<AliasAnalysis>(); - // Accessors. - // - MachineSchedRegistry *getNext() const { - return (MachineSchedRegistry *)MachinePassRegistryNode::getNext(); - } - static MachineSchedRegistry *getList() { - return (MachineSchedRegistry *)Registry.getList(); - } - static ScheduleDAGCtor getDefault() { - return (ScheduleDAGCtor)Registry.getDefault(); - } - static void setDefault(ScheduleDAGCtor C) { - Registry.setDefault((MachinePassCtor)C); + LIS = &getAnalysis<LiveIntervals>(); + const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + + // Select the scheduler, or set the default. + MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt; + if (Ctor == useDefaultMachineSched) { + // Get the default scheduler set by the target. + Ctor = MachineSchedRegistry::getDefault(); + if (!Ctor) { + Ctor = createConvergingSched; + MachineSchedRegistry::setDefault(Ctor); + } } - static void setListener(MachinePassRegistryListener *L) { - Registry.setListener(L); + // Instantiate the selected scheduler. + OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this)); + + // Visit all machine basic blocks. + for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end(); + MBB != MBBEnd; ++MBB) { + + Scheduler->startBlock(MBB); + + // Break the block into scheduling regions [I, RegionEnd), and schedule each + // region as soon as it is discovered. RegionEnd points the the scheduling + // boundary at the bottom of the region. The DAG does not include RegionEnd, + // but the region does (i.e. the next RegionEnd is above the previous + // RegionBegin). If the current block has no terminator then RegionEnd == + // MBB->end() for the bottom region. + // + // The Scheduler may insert instructions during either schedule() or + // exitRegion(), even for empty regions. So the local iterators 'I' and + // 'RegionEnd' are invalid across these calls. + unsigned RemainingCount = MBB->size(); + for(MachineBasicBlock::iterator RegionEnd = MBB->end(); + RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) { + // Avoid decrementing RegionEnd for blocks with no terminator. + if (RegionEnd != MBB->end() + || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) { + --RegionEnd; + // Count the boundary instruction. + --RemainingCount; + } + + // The next region starts above the previous region. Look backward in the + // instruction stream until we find the nearest boundary. + MachineBasicBlock::iterator I = RegionEnd; + for(;I != MBB->begin(); --I, --RemainingCount) { + if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF)) + break; + } + // Notify the scheduler of the region, even if we may skip scheduling + // it. Perhaps it still needs to be bundled. + Scheduler->enterRegion(MBB, I, RegionEnd, RemainingCount); + + // Skip empty scheduling regions (0 or 1 schedulable instructions). + if (I == RegionEnd || I == llvm::prior(RegionEnd)) { + // Close the current region. Bundle the terminator if needed. + // This invalidates 'RegionEnd' and 'I'. + Scheduler->exitRegion(); + continue; + } + DEBUG(dbgs() << "MachineScheduling " << MF->getFunction()->getName() + << ":BB#" << MBB->getNumber() << "\n From: " << *I << " To: "; + if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; + else dbgs() << "End"; + dbgs() << " Remaining: " << RemainingCount << "\n"); + + // Schedule a region: possibly reorder instructions. + // This invalidates 'RegionEnd' and 'I'. + Scheduler->schedule(); + + // Close the current region. + Scheduler->exitRegion(); + + // Scheduling has invalidated the current iterator 'I'. Ask the + // scheduler for the top of it's scheduled region. + RegionEnd = Scheduler->begin(); + } + assert(RemainingCount == 0 && "Instruction count mismatch!"); + Scheduler->finishBlock(); } -}; -} // namespace + DEBUG(LIS->print(dbgs())); + return true; +} -MachinePassRegistry MachineSchedRegistry::Registry; +void MachineScheduler::print(raw_ostream &O, const Module* m) const { + // unimplemented +} -static ScheduleDAGInstrs *createDefaultMachineSched(MachineScheduler *P); +//===----------------------------------------------------------------------===// +// MachineSchedStrategy - Interface to a machine scheduling algorithm. +//===----------------------------------------------------------------------===// -/// MachineSchedOpt allows command line selection of the scheduler. -static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false, - RegisterPassParser<MachineSchedRegistry> > -MachineSchedOpt("misched", - cl::init(&createDefaultMachineSched), cl::Hidden, - cl::desc("Machine instruction scheduler to use")); +namespace { +class ScheduleDAGMI; + +/// MachineSchedStrategy - Interface used by ScheduleDAGMI to drive the selected +/// scheduling algorithm. +/// +/// If this works well and targets wish to reuse ScheduleDAGMI, we may expose it +/// in ScheduleDAGInstrs.h +class MachineSchedStrategy { +public: + virtual ~MachineSchedStrategy() {} + + /// Initialize the strategy after building the DAG for a new region. + virtual void initialize(ScheduleDAGMI *DAG) = 0; + + /// Pick the next node to schedule, or return NULL. Set IsTopNode to true to + /// schedule the node at the top of the unscheduled region. Otherwise it will + /// be scheduled at the bottom. + virtual SUnit *pickNode(bool &IsTopNode) = 0; + + /// When all predecessor dependencies have been resolved, free this node for + /// top-down scheduling. + virtual void releaseTopNode(SUnit *SU) = 0; + /// When all successor dependencies have been resolved, free this node for + /// bottom-up scheduling. + virtual void releaseBottomNode(SUnit *SU) = 0; +}; +} // namespace //===----------------------------------------------------------------------===// -// Machine Instruction Scheduling Common Implementation +// ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals +// preservation. //===----------------------------------------------------------------------===// namespace { -/// ScheduleTopDownLive is an implementation of ScheduleDAGInstrs that schedules +/// ScheduleDAGMI is an implementation of ScheduleDAGInstrs that schedules /// machine instructions while updating LiveIntervals. -class ScheduleTopDownLive : public ScheduleDAGInstrs { -protected: - MachineScheduler *Pass; +class ScheduleDAGMI : public ScheduleDAGInstrs { + AliasAnalysis *AA; + MachineSchedStrategy *SchedImpl; + + /// The top of the unscheduled zone. + MachineBasicBlock::iterator CurrentTop; + + /// The bottom of the unscheduled zone. + MachineBasicBlock::iterator CurrentBottom; + + /// The number of instructions scheduled so far. Used to cut off the + /// scheduler at the point determined by misched-cutoff. + unsigned NumInstrsScheduled; public: - ScheduleTopDownLive(MachineScheduler *P): - ScheduleDAGInstrs(*P->MF, *P->MLI, *P->MDT, /*IsPostRA=*/false, P->LIS), - Pass(P) {} + ScheduleDAGMI(MachineSchedContext *C, MachineSchedStrategy *S): + ScheduleDAGInstrs(*C->MF, *C->MLI, *C->MDT, /*IsPostRA=*/false, C->LIS), + AA(C->AA), SchedImpl(S), CurrentTop(), CurrentBottom(), + NumInstrsScheduled(0) {} - /// ScheduleDAGInstrs callback. - void Schedule(); + ~ScheduleDAGMI() { + delete SchedImpl; + } - /// Interface implemented by the selected top-down liveinterval scheduler. - /// - /// Pick the next node to schedule, or return NULL. - virtual SUnit *pickNode() = 0; + MachineBasicBlock::iterator top() const { return CurrentTop; } + MachineBasicBlock::iterator bottom() const { return CurrentBottom; } - /// When all preceeding dependencies have been resolved, free this node for - /// scheduling. - virtual void releaseNode(SUnit *SU) = 0; + /// Implement ScheduleDAGInstrs interface. + void schedule(); protected: + void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos); + bool checkSchedLimit(); + void releaseSucc(SUnit *SU, SDep *SuccEdge); void releaseSuccessors(SUnit *SU); + void releasePred(SUnit *SU, SDep *PredEdge); + void releasePredecessors(SUnit *SU); }; } // namespace /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When /// NumPredsLeft reaches zero, release the successor node. -void ScheduleTopDownLive::releaseSucc(SUnit *SU, SDep *SuccEdge) { +void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) { SUnit *SuccSU = SuccEdge->getSUnit(); #ifndef NDEBUG @@ -185,164 +331,199 @@ void ScheduleTopDownLive::releaseSucc(SUnit *SU, SDep *SuccEdge) { #endif --SuccSU->NumPredsLeft; if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) - releaseNode(SuccSU); + SchedImpl->releaseTopNode(SuccSU); } /// releaseSuccessors - Call releaseSucc on each of SU's successors. -void ScheduleTopDownLive::releaseSuccessors(SUnit *SU) { +void ScheduleDAGMI::releaseSuccessors(SUnit *SU) { for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { releaseSucc(SU, &*I); } } -/// Schedule - This is called back from ScheduleDAGInstrs::Run() when it's -/// time to do some work. -void ScheduleTopDownLive::Schedule() { - BuildSchedGraph(&Pass->getAnalysis<AliasAnalysis>()); +/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When +/// NumSuccsLeft reaches zero, release the predecessor node. +void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) { + SUnit *PredSU = PredEdge->getSUnit(); + +#ifndef NDEBUG + if (PredSU->NumSuccsLeft == 0) { + dbgs() << "*** Scheduling failed! ***\n"; + PredSU->dump(this); + dbgs() << " has been released too many times!\n"; + llvm_unreachable(0); + } +#endif + --PredSU->NumSuccsLeft; + if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) + SchedImpl->releaseBottomNode(PredSU); +} + +/// releasePredecessors - Call releasePred on each of SU's predecessors. +void ScheduleDAGMI::releasePredecessors(SUnit *SU) { + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + releasePred(SU, &*I); + } +} + +void ScheduleDAGMI::moveInstruction(MachineInstr *MI, + MachineBasicBlock::iterator InsertPos) { + // Fix RegionBegin if the first instruction moves down. + if (&*RegionBegin == MI) + RegionBegin = llvm::next(RegionBegin); + BB->splice(InsertPos, BB, MI); + LIS->handleMove(MI); + // Fix RegionBegin if another instruction moves above the first instruction. + if (RegionBegin == InsertPos) + RegionBegin = MI; +} + +bool ScheduleDAGMI::checkSchedLimit() { +#ifndef NDEBUG + if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) { + CurrentTop = CurrentBottom; + return false; + } + ++NumInstrsScheduled; +#endif + return true; +} + +/// schedule - Called back from MachineScheduler::runOnMachineFunction +/// after setting up the current scheduling region. +void ScheduleDAGMI::schedule() { + buildSchedGraph(AA); DEBUG(dbgs() << "********** MI Scheduling **********\n"); DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) SUnits[su].dumpAll(this)); - // Release any successors of the special Entry node. It is currently unused, - // but we keep up appearances. + if (ViewMISchedDAGs) viewGraph(); + + SchedImpl->initialize(this); + + // Release edges from the special Entry node or to the special Exit node. releaseSuccessors(&EntrySU); + releasePredecessors(&ExitSU); // Release all DAG roots for scheduling. for (std::vector<SUnit>::iterator I = SUnits.begin(), E = SUnits.end(); I != E; ++I) { - // A SUnit is ready to schedule if it has no predecessors. + // A SUnit is ready to top schedule if it has no predecessors. if (I->Preds.empty()) - releaseNode(&(*I)); + SchedImpl->releaseTopNode(&(*I)); + // A SUnit is ready to bottom schedule if it has no successors. + if (I->Succs.empty()) + SchedImpl->releaseBottomNode(&(*I)); } - InsertPos = Begin; - while (SUnit *SU = pickNode()) { - DEBUG(dbgs() << "*** Scheduling Instruction:\n"; SU->dump(this)); + CurrentTop = RegionBegin; + CurrentBottom = RegionEnd; + bool IsTopNode = false; + while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { + DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom") + << " Scheduling Instruction:\n"; SU->dump(this)); + if (!checkSchedLimit()) + break; // Move the instruction to its new location in the instruction stream. MachineInstr *MI = SU->getInstr(); - if (&*InsertPos == MI) - ++InsertPos; + + if (IsTopNode) { + assert(SU->isTopReady() && "node still has unscheduled dependencies"); + if (&*CurrentTop == MI) + ++CurrentTop; + else + moveInstruction(MI, CurrentTop); + // Release dependent instructions for scheduling. + releaseSuccessors(SU); + } else { - BB->splice(InsertPos, BB, MI); - Pass->LIS->handleMove(MI); - if (Begin == InsertPos) - Begin = MI; + assert(SU->isBottomReady() && "node still has unscheduled dependencies"); + if (&*llvm::prior(CurrentBottom) == MI) + --CurrentBottom; + else { + if (&*CurrentTop == MI) + CurrentTop = llvm::next(CurrentTop); + moveInstruction(MI, CurrentBottom); + CurrentBottom = MI; + } + // Release dependent instructions for scheduling. + releasePredecessors(SU); } - - // Release dependent instructions for scheduling. - releaseSuccessors(SU); + SU->isScheduled = true; } + assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); } -bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { - // Initialize the context of the pass. - MF = &mf; - MLI = &getAnalysis<MachineLoopInfo>(); - MDT = &getAnalysis<MachineDominatorTree>(); - LIS = &getAnalysis<LiveIntervals>(); - TII = MF->getTarget().getInstrInfo(); +//===----------------------------------------------------------------------===// +// ConvergingScheduler - Implementation of the standard MachineSchedStrategy. +//===----------------------------------------------------------------------===// - // Select the scheduler, or set the default. - MachineSchedRegistry::ScheduleDAGCtor Ctor = - MachineSchedRegistry::getDefault(); - if (!Ctor) { - Ctor = MachineSchedOpt; - MachineSchedRegistry::setDefault(Ctor); - } - // Instantiate the selected scheduler. - OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this)); +namespace { +/// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance +/// the schedule. +class ConvergingScheduler : public MachineSchedStrategy { + ScheduleDAGMI *DAG; - // Visit all machine basic blocks. - for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end(); - MBB != MBBEnd; ++MBB) { + unsigned NumTopReady; + unsigned NumBottomReady; - // Break the block into scheduling regions [I, RegionEnd), and schedule each - // region as soon as it is discovered. - unsigned RemainingCount = MBB->size(); - for(MachineBasicBlock::iterator RegionEnd = MBB->end(); - RegionEnd != MBB->begin();) { - // The next region starts above the previous region. Look backward in the - // instruction stream until we find the nearest boundary. - MachineBasicBlock::iterator I = RegionEnd; - for(;I != MBB->begin(); --I, --RemainingCount) { - if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF)) - break; - } - if (I == RegionEnd) { - // Skip empty scheduling regions. - RegionEnd = llvm::prior(RegionEnd); - --RemainingCount; - continue; - } - // Skip regions with one instruction. - if (I == llvm::prior(RegionEnd)) { - RegionEnd = llvm::prior(RegionEnd); - continue; - } - DEBUG(dbgs() << "MachineScheduling " << MF->getFunction()->getName() - << ":BB#" << MBB->getNumber() << "\n From: " << *I << " To: "; - if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; - else dbgs() << "End"; - dbgs() << " Remaining: " << RemainingCount << "\n"); +public: + virtual void initialize(ScheduleDAGMI *dag) { + DAG = dag; - // Inform ScheduleDAGInstrs of the region being scheduled. It calls back - // to our Schedule() method. - Scheduler->Run(MBB, I, RegionEnd, MBB->size()); - RegionEnd = Scheduler->Begin; - } - assert(RemainingCount == 0 && "Instruction count mismatch!"); + assert((!ForceTopDown || !ForceBottomUp) && + "-misched-topdown incompatible with -misched-bottomup"); } - return true; -} -void MachineScheduler::print(raw_ostream &O, const Module* m) const { - // unimplemented -} + virtual SUnit *pickNode(bool &IsTopNode) { + if (DAG->top() == DAG->bottom()) + return NULL; -//===----------------------------------------------------------------------===// -// Placeholder for extending the machine instruction scheduler. -//===----------------------------------------------------------------------===// - -namespace { -class DefaultMachineScheduler : public ScheduleDAGInstrs { - MachineScheduler *Pass; -public: - DefaultMachineScheduler(MachineScheduler *P): - ScheduleDAGInstrs(*P->MF, *P->MLI, *P->MDT, /*IsPostRA=*/false, P->LIS), - Pass(P) {} + // As an initial placeholder heuristic, schedule in the direction that has + // the fewest choices. + SUnit *SU; + if (ForceTopDown || (!ForceBottomUp && NumTopReady <= NumBottomReady)) { + SU = DAG->getSUnit(DAG->top()); + IsTopNode = true; + } + else { + SU = DAG->getSUnit(llvm::prior(DAG->bottom())); + IsTopNode = false; + } + if (SU->isTopReady()) { + assert(NumTopReady > 0 && "bad ready count"); + --NumTopReady; + } + if (SU->isBottomReady()) { + assert(NumBottomReady > 0 && "bad ready count"); + --NumBottomReady; + } + return SU; + } - /// Schedule - This is called back from ScheduleDAGInstrs::Run() when it's - /// time to do some work. - void Schedule(); + virtual void releaseTopNode(SUnit *SU) { + ++NumTopReady; + } + virtual void releaseBottomNode(SUnit *SU) { + ++NumBottomReady; + } }; } // namespace -static ScheduleDAGInstrs *createDefaultMachineSched(MachineScheduler *P) { - return new DefaultMachineScheduler(P); +/// Create the standard converging machine scheduler. This will be used as the +/// default scheduler if the target does not set a default. +static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) { + assert((!ForceTopDown || !ForceBottomUp) && + "-misched-topdown incompatible with -misched-bottomup"); + return new ScheduleDAGMI(C, new ConvergingScheduler()); } static MachineSchedRegistry -SchedDefaultRegistry("default", "Activate the scheduler pass, " - "but don't reorder instructions", - createDefaultMachineSched); - - -/// Schedule - This is called back from ScheduleDAGInstrs::Run() when it's -/// time to do some work. -void DefaultMachineScheduler::Schedule() { - BuildSchedGraph(&Pass->getAnalysis<AliasAnalysis>()); - - DEBUG(dbgs() << "********** MI Scheduling **********\n"); - DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) - SUnits[su].dumpAll(this)); - - // TODO: Put interesting things here. - // - // When this is fully implemented, it will become a subclass of - // ScheduleTopDownLive. So this driver will disappear. -} +ConvergingSchedRegistry("converge", "Standard converging scheduler.", + createConvergingSched); //===----------------------------------------------------------------------===// // Machine Instruction Shuffler for Correctness Testing @@ -350,43 +531,83 @@ void DefaultMachineScheduler::Schedule() { #ifndef NDEBUG namespace { -// Nodes with a higher number have higher priority. This way we attempt to -// schedule the latest instructions earliest. -// -// TODO: Relies on the property of the BuildSchedGraph that results in SUnits -// being ordered in sequence top-down. -struct ShuffleSUnitOrder { +/// Apply a less-than relation on the node order, which corresponds to the +/// instruction order prior to scheduling. IsReverse implements greater-than. +template<bool IsReverse> +struct SUnitOrder { bool operator()(SUnit *A, SUnit *B) const { - return A->NodeNum < B->NodeNum; + if (IsReverse) + return A->NodeNum > B->NodeNum; + else + return A->NodeNum < B->NodeNum; } }; /// Reorder instructions as much as possible. -class InstructionShuffler : public ScheduleTopDownLive { - std::priority_queue<SUnit*, std::vector<SUnit*>, ShuffleSUnitOrder> Queue; +class InstructionShuffler : public MachineSchedStrategy { + bool IsAlternating; + bool IsTopDown; + + // Using a less-than relation (SUnitOrder<false>) for the TopQ priority + // gives nodes with a higher number higher priority causing the latest + // instructions to be scheduled first. + PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> > + TopQ; + // When scheduling bottom-up, use greater-than as the queue priority. + PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> > + BottomQ; public: - InstructionShuffler(MachineScheduler *P): - ScheduleTopDownLive(P) {} + InstructionShuffler(bool alternate, bool topdown) + : IsAlternating(alternate), IsTopDown(topdown) {} - /// ScheduleTopDownLive Interface + virtual void initialize(ScheduleDAGMI *) { + TopQ.clear(); + BottomQ.clear(); + } - virtual SUnit *pickNode() { - if (Queue.empty()) return NULL; - SUnit *SU = Queue.top(); - Queue.pop(); + /// Implement MachineSchedStrategy interface. + /// ----------------------------------------- + + virtual SUnit *pickNode(bool &IsTopNode) { + SUnit *SU; + if (IsTopDown) { + do { + if (TopQ.empty()) return NULL; + SU = TopQ.top(); + TopQ.pop(); + } while (SU->isScheduled); + IsTopNode = true; + } + else { + do { + if (BottomQ.empty()) return NULL; + SU = BottomQ.top(); + BottomQ.pop(); + } while (SU->isScheduled); + IsTopNode = false; + } + if (IsAlternating) + IsTopDown = !IsTopDown; return SU; } - virtual void releaseNode(SUnit *SU) { - Queue.push(SU); + virtual void releaseTopNode(SUnit *SU) { + TopQ.push(SU); + } + virtual void releaseBottomNode(SUnit *SU) { + BottomQ.push(SU); } }; } // namespace -static ScheduleDAGInstrs *createInstructionShuffler(MachineScheduler *P) { - return new InstructionShuffler(P); +static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) { + bool Alternate = !ForceTopDown && !ForceBottomUp; + bool TopDown = !ForceBottomUp; + assert((TopDown || !ForceTopDown) && + "-misched-topdown incompatible with -misched-bottomup"); + return new ScheduleDAGMI(C, new InstructionShuffler(Alternate, TopDown)); } -static MachineSchedRegistry ShufflerRegistry("shuffle", - "Shuffle machine instructions", - createInstructionShuffler); +static MachineSchedRegistry ShufflerRegistry( + "shuffle", "Shuffle machine instructions alternating directions", + createInstructionShuffler); #endif // !NDEBUG diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 394a960..830a876 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -900,7 +900,7 @@ MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) { void MachineVerifier::calcRegsPassed() { // First push live-out regs to successors' vregsPassed. Remember the MBBs that // have any vregsPassed. - DenseSet<const MachineBasicBlock*> todo; + SmallPtrSet<const MachineBasicBlock*, 8> todo; for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); MFI != MFE; ++MFI) { const MachineBasicBlock &MBB(*MFI); @@ -937,7 +937,7 @@ void MachineVerifier::calcRegsPassed() { // similar to calcRegsPassed, only backwards. void MachineVerifier::calcRegsRequired() { // First push live-in regs to predecessors' vregsRequired. - DenseSet<const MachineBasicBlock*> todo; + SmallPtrSet<const MachineBasicBlock*, 8> todo; for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); MFI != MFE; ++MFI) { const MachineBasicBlock &MBB(*MFI); @@ -970,9 +970,10 @@ void MachineVerifier::calcRegsRequired() { // Check PHI instructions at the beginning of MBB. It is assumed that // calcRegsPassed has been run so BBInfo::isLiveOut is valid. void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) { + SmallPtrSet<const MachineBasicBlock*, 8> seen; for (MachineBasicBlock::const_iterator BBI = MBB->begin(), BBE = MBB->end(); BBI != BBE && BBI->isPHI(); ++BBI) { - DenseSet<const MachineBasicBlock*> seen; + seen.clear(); for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) { unsigned Reg = BBI->getOperand(i).getReg(); @@ -1013,8 +1014,17 @@ void MachineVerifier::visitMachineFunctionAfter() { } // Now check liveness info if available - if (LiveVars || LiveInts) - calcRegsRequired(); + calcRegsRequired(); + + if (MRI->isSSA() && !MF->empty()) { + BBInfo &MInfo = MBBInfoMap[&MF->front()]; + for (RegSet::iterator + I = MInfo.vregsRequired.begin(), E = MInfo.vregsRequired.end(); I != E; + ++I) + report("Virtual register def doesn't dominate all uses.", + MRI->getVRegDef(*I)); + } + if (LiveVars) verifyLiveVariables(); if (LiveInts) diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index ec1f2b4..6246c21 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -564,7 +564,8 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { addPass(RegisterCoalescerID); // PreRA instruction scheduling. - addPass(MachineSchedulerID); + if (addPass(MachineSchedulerID) != &NoPassID) + printAndVerify("After Machine Scheduling"); // Add the selected register allocation pass. PM.add(RegAllocPass); diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index e59aa9d..24d3e5a 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -23,7 +23,6 @@ #include "AggressiveAntiDepBreaker.h" #include "CriticalAntiDepBreaker.h" #include "RegisterClassInfo.h" -#include "ScheduleDAGInstrs.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/LatencyPriorityQueue.h" #include "llvm/CodeGen/SchedulerRegistry.h" @@ -32,6 +31,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Target/TargetLowering.h" @@ -127,6 +127,9 @@ namespace { /// LiveRegs - true if the register is live. BitVector LiveRegs; + /// The schedule. Null SUnit*'s represent noop instructions. + std::vector<SUnit*> Sequence; + public: SchedulePostRATDList( MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, @@ -136,23 +139,34 @@ namespace { ~SchedulePostRATDList(); - /// StartBlock - Initialize register live-range state for scheduling in + /// startBlock - Initialize register live-range state for scheduling in /// this block. /// - void StartBlock(MachineBasicBlock *BB); + void startBlock(MachineBasicBlock *BB); + + /// Initialize the scheduler state for the next scheduling region. + virtual void enterRegion(MachineBasicBlock *bb, + MachineBasicBlock::iterator begin, + MachineBasicBlock::iterator end, + unsigned endcount); + + /// Notify that the scheduler has finished scheduling the current region. + virtual void exitRegion(); /// Schedule - Schedule the instruction range using list scheduling. /// - void Schedule(); + void schedule(); + + void EmitSchedule(); /// Observe - Update liveness information to account for the current /// instruction, which will not be scheduled. /// void Observe(MachineInstr *MI, unsigned Count); - /// FinishBlock - Clean up register live-range state. + /// finishBlock - Clean up register live-range state. /// - void FinishBlock(); + void finishBlock(); /// FixupKills - Fix register kill flags that have been made /// invalid due to scheduling @@ -170,6 +184,8 @@ namespace { // adjustments may be made to the instruction if necessary. Return // true if the operand has been deleted, false if not. bool ToggleKillFlag(MachineInstr *MI, MachineOperand &MO); + + void dumpSchedule() const; }; } @@ -202,6 +218,35 @@ SchedulePostRATDList::~SchedulePostRATDList() { delete AntiDepBreak; } +/// Initialize state associated with the next scheduling region. +void SchedulePostRATDList::enterRegion(MachineBasicBlock *bb, + MachineBasicBlock::iterator begin, + MachineBasicBlock::iterator end, + unsigned endcount) { + ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount); + Sequence.clear(); +} + +/// Print the schedule before exiting the region. +void SchedulePostRATDList::exitRegion() { + DEBUG({ + dbgs() << "*** Final schedule ***\n"; + dumpSchedule(); + dbgs() << '\n'; + }); + ScheduleDAGInstrs::exitRegion(); +} + +/// dumpSchedule - dump the scheduled Sequence. +void SchedulePostRATDList::dumpSchedule() const { + for (unsigned i = 0, e = Sequence.size(); i != e; i++) { + if (SUnit *SU = Sequence[i]) + SU->dump(this); + else + dbgs() << "**** NOOP ****\n"; + } +} + bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { TII = Fn.getTarget().getInstrInfo(); MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); @@ -256,7 +301,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { #endif // Initialize register live-range state for scheduling in this block. - Scheduler.StartBlock(MBB); + Scheduler.startBlock(MBB); // Schedule each sequence of instructions not interrupted by a label // or anything else that effectively needs to shut down scheduling. @@ -268,7 +313,9 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { // post-ra we don't gain anything by scheduling across calls since we // don't need to worry about register pressure. if (MI->isCall() || TII->isSchedulingBoundary(MI, MBB, Fn)) { - Scheduler.Run(MBB, I, Current, CurrentCount); + Scheduler.enterRegion(MBB, I, Current, CurrentCount); + Scheduler.schedule(); + Scheduler.exitRegion(); Scheduler.EmitSchedule(); Current = MI; CurrentCount = Count - 1; @@ -282,11 +329,13 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { assert(Count == 0 && "Instruction count mismatch!"); assert((MBB->begin() == Current || CurrentCount != 0) && "Instruction count mismatch!"); - Scheduler.Run(MBB, MBB->begin(), Current, CurrentCount); + Scheduler.enterRegion(MBB, MBB->begin(), Current, CurrentCount); + Scheduler.schedule(); + Scheduler.exitRegion(); Scheduler.EmitSchedule(); // Clean up register live-range state. - Scheduler.FinishBlock(); + Scheduler.finishBlock(); // Update register kills Scheduler.FixupKills(MBB); @@ -298,9 +347,9 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { /// StartBlock - Initialize register live-range state for scheduling in /// this block. /// -void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) { +void SchedulePostRATDList::startBlock(MachineBasicBlock *BB) { // Call the superclass. - ScheduleDAGInstrs::StartBlock(BB); + ScheduleDAGInstrs::startBlock(BB); // Reset the hazard recognizer and anti-dep breaker. HazardRec->Reset(); @@ -310,14 +359,14 @@ void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) { /// Schedule - Schedule the instruction range using list scheduling. /// -void SchedulePostRATDList::Schedule() { +void SchedulePostRATDList::schedule() { // Build the scheduling graph. - BuildSchedGraph(AA); + buildSchedGraph(AA); if (AntiDepBreak != NULL) { unsigned Broken = - AntiDepBreak->BreakAntiDependencies(SUnits, Begin, InsertPos, - InsertPosIndex, DbgValues); + AntiDepBreak->BreakAntiDependencies(SUnits, RegionBegin, RegionEnd, + EndIndex, DbgValues); if (Broken != 0) { // We made changes. Update the dependency graph. @@ -326,11 +375,8 @@ void SchedulePostRATDList::Schedule() { // the def's anti-dependence *and* output-dependence edges due to // that register, and add new anti-dependence and output-dependence // edges based on the next live range of the register. - SUnits.clear(); - Sequence.clear(); - EntrySU = SUnit(); - ExitSU = SUnit(); - BuildSchedGraph(AA); + ScheduleDAG::clearDAG(); + buildSchedGraph(AA); NumFixedAnti += Broken; } @@ -350,17 +396,17 @@ void SchedulePostRATDList::Schedule() { /// void SchedulePostRATDList::Observe(MachineInstr *MI, unsigned Count) { if (AntiDepBreak != NULL) - AntiDepBreak->Observe(MI, Count, InsertPosIndex); + AntiDepBreak->Observe(MI, Count, EndIndex); } /// FinishBlock - Clean up register live-range state. /// -void SchedulePostRATDList::FinishBlock() { +void SchedulePostRATDList::finishBlock() { if (AntiDepBreak != NULL) AntiDepBreak->FinishBlock(); // Call the superclass. - ScheduleDAGInstrs::FinishBlock(); + ScheduleDAGInstrs::finishBlock(); } /// StartBlockForKills - Initialize register live-range state for updating kills @@ -589,7 +635,7 @@ void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { ReleaseSuccessors(SU); SU->isScheduled = true; - AvailableQueue.ScheduledNode(SU); + AvailableQueue.scheduledNode(SU); } /// ListScheduleTopDown - The main loop of list scheduling for top-down @@ -703,6 +749,46 @@ void SchedulePostRATDList::ListScheduleTopDown() { } #ifndef NDEBUG - VerifySchedule(/*isBottomUp=*/false); -#endif + unsigned ScheduledNodes = VerifyScheduledDAG(/*isBottomUp=*/false); + unsigned Noops = 0; + for (unsigned i = 0, e = Sequence.size(); i != e; ++i) + if (!Sequence[i]) + ++Noops; + assert(Sequence.size() - Noops == ScheduledNodes && + "The number of nodes scheduled doesn't match the expected number!"); +#endif // NDEBUG +} + +// EmitSchedule - Emit the machine code in scheduled order. +void SchedulePostRATDList::EmitSchedule() { + RegionBegin = RegionEnd; + + // If first instruction was a DBG_VALUE then put it back. + if (FirstDbgValue) + BB->splice(RegionEnd, BB, FirstDbgValue); + + // Then re-insert them according to the given schedule. + for (unsigned i = 0, e = Sequence.size(); i != e; i++) { + if (SUnit *SU = Sequence[i]) + BB->splice(RegionEnd, BB, SU->getInstr()); + else + // Null SUnit* is a noop. + TII->insertNoop(*BB, RegionEnd); + + // Update the Begin iterator, as the first instruction in the block + // may have been scheduled later. + if (i == 0) + RegionBegin = prior(RegionEnd); + } + + // Reinsert any remaining debug_values. + for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator + DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { + std::pair<MachineInstr *, MachineInstr *> P = *prior(DI); + MachineInstr *DbgValue = P.first; + MachineBasicBlock::iterator OrigPrivMI = P.second; + BB->splice(++OrigPrivMI, BB, DbgValue); + } + DbgValues.clear(); + FirstDbgValue = NULL; } diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index c27a485..e09b7f8 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -1139,7 +1139,7 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) { // Add the clobber lists for all the instructions we skipped earlier. for (SmallPtrSet<const MCInstrDesc*, 4>::const_iterator I = SkippedInstrs.begin(), E = SkippedInstrs.end(); I != E; ++I) - if (const unsigned *Defs = (*I)->getImplicitDefs()) + if (const uint16_t *Defs = (*I)->getImplicitDefs()) while (*Defs) MRI->setPhysRegUsed(*Defs++); diff --git a/lib/CodeGen/RegisterCoalescer.h b/lib/CodeGen/RegisterCoalescer.h index ef0c508..310b933 100644 --- a/lib/CodeGen/RegisterCoalescer.h +++ b/lib/CodeGen/RegisterCoalescer.h @@ -47,7 +47,7 @@ namespace llvm { /// CrossClass - True when both regs are virtual, and newRC is constrained. bool CrossClass; - /// Flipped - True when DstReg and SrcReg are reversed from the oriignal + /// Flipped - True when DstReg and SrcReg are reversed from the original /// copy instruction. bool Flipped; diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index 94b28b6..8fd6426 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -46,42 +46,17 @@ ScheduleDAG::ScheduleDAG(MachineFunction &mf) ScheduleDAG::~ScheduleDAG() {} -/// getInstrDesc helper to handle SDNodes. -const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const { - if (!Node || !Node->isMachineOpcode()) return NULL; - return &TII->get(Node->getMachineOpcode()); -} - -/// dump - dump the schedule. -void ScheduleDAG::dumpSchedule() const { - for (unsigned i = 0, e = Sequence.size(); i != e; i++) { - if (SUnit *SU = Sequence[i]) - SU->dump(this); - else - dbgs() << "**** NOOP ****\n"; - } -} - - -/// Run - perform scheduling. -/// -void ScheduleDAG::Run(MachineBasicBlock *bb, - MachineBasicBlock::iterator insertPos) { - BB = bb; - InsertPos = insertPos; - +/// Clear the DAG state (e.g. between scheduling regions). +void ScheduleDAG::clearDAG() { SUnits.clear(); - Sequence.clear(); EntrySU = SUnit(); ExitSU = SUnit(); +} - Schedule(); - - DEBUG({ - dbgs() << "*** Final schedule ***\n"; - dumpSchedule(); - dbgs() << '\n'; - }); +/// getInstrDesc helper to handle SDNodes. +const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const { + if (!Node || !Node->isMachineOpcode()) return NULL; + return &TII->get(Node->getMachineOpcode()); } /// addPred - This adds the specified edge as a pred of the current node if @@ -346,13 +321,12 @@ void SUnit::dumpAll(const ScheduleDAG *G) const { } #ifndef NDEBUG -/// VerifySchedule - Verify that all SUnits were scheduled and that -/// their state is consistent. +/// VerifyScheduledDAG - Verify that all SUnits were scheduled and that +/// their state is consistent. Return the number of scheduled nodes. /// -void ScheduleDAG::VerifySchedule(bool isBottomUp) { +unsigned ScheduleDAG::VerifyScheduledDAG(bool isBottomUp) { bool AnyNotSched = false; unsigned DeadNodes = 0; - unsigned Noops = 0; for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { if (!SUnits[i].isScheduled) { if (SUnits[i].NumPreds == 0 && SUnits[i].NumSuccs == 0) { @@ -393,12 +367,8 @@ void ScheduleDAG::VerifySchedule(bool isBottomUp) { } } } - for (unsigned i = 0, e = Sequence.size(); i != e; ++i) - if (!Sequence[i]) - ++Noops; assert(!AnyNotSched); - assert(Sequence.size() + DeadNodes - Noops == SUnits.size() && - "The number of nodes scheduled doesn't match the expected number!"); + return SUnits.size() - DeadNodes; } #endif diff --git a/lib/CodeGen/ScheduleDAGEmit.cpp b/lib/CodeGen/ScheduleDAGEmit.cpp deleted file mode 100644 index f8b1bc7..0000000 --- a/lib/CodeGen/ScheduleDAGEmit.cpp +++ /dev/null @@ -1,68 +0,0 @@ -//===---- ScheduleDAGEmit.cpp - Emit routines for the ScheduleDAG class ---===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This implements the Emit routines for the ScheduleDAG class, which creates -// MachineInstrs according to the computed schedule. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "pre-RA-sched" -#include "llvm/CodeGen/ScheduleDAG.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/MathExtras.h" -using namespace llvm; - -void ScheduleDAG::EmitNoop() { - TII->insertNoop(*BB, InsertPos); -} - -void ScheduleDAG::EmitPhysRegCopy(SUnit *SU, - DenseMap<SUnit*, unsigned> &VRBaseMap) { - for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - if (I->isCtrl()) continue; // ignore chain preds - if (I->getSUnit()->CopyDstRC) { - // Copy to physical register. - DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit()); - assert(VRI != VRBaseMap.end() && "Node emitted out of order - late"); - // Find the destination physical register. - unsigned Reg = 0; - for (SUnit::const_succ_iterator II = SU->Succs.begin(), - EE = SU->Succs.end(); II != EE; ++II) { - if (II->isCtrl()) continue; // ignore chain preds - if (II->getReg()) { - Reg = II->getReg(); - break; - } - } - BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg) - .addReg(VRI->second); - } else { - // Copy from physical register. - assert(I->getReg() && "Unknown physical register!"); - unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC); - bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second; - (void)isNew; // Silence compiler warning. - assert(isNew && "Node emitted out of order - early"); - BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase) - .addReg(I->getReg()); - } - break; - } -} diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index c0ccdb3..6be1ab7 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "sched-instrs" -#include "ScheduleDAGInstrs.h" #include "llvm/Operator.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ValueTracking.h" @@ -22,6 +21,7 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" @@ -38,30 +38,15 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, bool IsPostRAFlag, LiveIntervals *lis) : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), - InstrItins(mf.getTarget().getInstrItineraryData()), IsPostRA(IsPostRAFlag), - LIS(lis), UnitLatencies(false), LoopRegs(MLI, MDT), FirstDbgValue(0) { + InstrItins(mf.getTarget().getInstrItineraryData()), LIS(lis), + IsPostRA(IsPostRAFlag), UnitLatencies(false), LoopRegs(MLI, MDT), + FirstDbgValue(0) { assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals"); DbgValues.clear(); assert(!(IsPostRA && MRI.getNumVirtRegs()) && "Virtual registers must be removed prior to PostRA scheduling"); } -/// Run - perform scheduling. -/// -void ScheduleDAGInstrs::Run(MachineBasicBlock *bb, - MachineBasicBlock::iterator begin, - MachineBasicBlock::iterator end, - unsigned endcount) { - BB = bb; - Begin = begin; - InsertPosIndex = endcount; - - // Check to see if the scheduler cares about latencies. - UnitLatencies = ForceUnitLatencies(); - - ScheduleDAG::Run(bb, end); -} - /// getUnderlyingObjectFromInt - This is the function that does the work of /// looking through basic ptrtoint+arithmetic+inttoptr sequences. static const Value *getUnderlyingObjectFromInt(const Value *V) { @@ -141,28 +126,58 @@ static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI, return 0; } -void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) { +void ScheduleDAGInstrs::startBlock(MachineBasicBlock *BB) { LoopRegs.Deps.clear(); if (MachineLoop *ML = MLI.getLoopFor(BB)) if (BB == ML->getLoopLatch()) LoopRegs.VisitLoop(ML); } +void ScheduleDAGInstrs::finishBlock() { + // Nothing to do. +} + /// Initialize the map with the number of registers. -void ScheduleDAGInstrs::Reg2SUnitsMap::setRegLimit(unsigned Limit) { +void Reg2SUnitsMap::setRegLimit(unsigned Limit) { PhysRegSet.setUniverse(Limit); SUnits.resize(Limit); } /// Clear the map without deallocating storage. -void ScheduleDAGInstrs::Reg2SUnitsMap::clear() { +void Reg2SUnitsMap::clear() { for (const_iterator I = reg_begin(), E = reg_end(); I != E; ++I) { SUnits[*I].clear(); } PhysRegSet.clear(); } -/// AddSchedBarrierDeps - Add dependencies from instructions in the current +/// Initialize the DAG and common scheduler state for the current scheduling +/// region. This does not actually create the DAG, only clears it. The +/// scheduling driver may call BuildSchedGraph multiple times per scheduling +/// region. +void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb, + MachineBasicBlock::iterator begin, + MachineBasicBlock::iterator end, + unsigned endcount) { + BB = bb; + RegionBegin = begin; + RegionEnd = end; + EndIndex = endcount; + MISUnitMap.clear(); + + // Check to see if the scheduler cares about latencies. + UnitLatencies = forceUnitLatencies(); + + ScheduleDAG::clearDAG(); +} + +/// Close the current scheduling region. Don't clear any state in case the +/// driver wants to refer to the previous scheduling region. +void ScheduleDAGInstrs::exitRegion() { + // Nothing to do. +} + +/// addSchedBarrierDeps - Add dependencies from instructions in the current /// list of instructions being scheduled to scheduling barrier by adding /// the exit SU to the register defs and use list. This is because we want to /// make sure instructions which define registers that are either used by @@ -170,8 +185,8 @@ void ScheduleDAGInstrs::Reg2SUnitsMap::clear() { /// especially important when the definition latency of the return value(s) /// are too high to be hidden by the branch or when the liveout registers /// used by instructions in the fallthrough block. -void ScheduleDAGInstrs::AddSchedBarrierDeps() { - MachineInstr *ExitMI = InsertPos != BB->end() ? &*InsertPos : 0; +void ScheduleDAGInstrs::addSchedBarrierDeps() { + MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : 0; ExitSU.setInstr(ExitMI); bool AllDepKnown = ExitMI && (ExitMI->isCall() || ExitMI->isBarrier()); @@ -186,19 +201,21 @@ void ScheduleDAGInstrs::AddSchedBarrierDeps() { if (TRI->isPhysicalRegister(Reg)) Uses[Reg].push_back(&ExitSU); - else + else { assert(!IsPostRA && "Virtual register encountered after regalloc."); + addVRegUseDeps(&ExitSU, i); + } } } else { // For others, e.g. fallthrough, conditional branch, assume the exit // uses all the registers that are livein to the successor blocks. - SmallSet<unsigned, 8> Seen; + assert(Uses.empty() && "Uses in set before adding deps?"); for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), E = (*SI)->livein_end(); I != E; ++I) { unsigned Reg = *I; - if (Seen.insert(Reg)) + if (!Uses.contains(Reg)) Uses[Reg].push_back(&ExitSU); } } @@ -246,7 +263,7 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, // perform its own adjustments. const SDep& dep = SDep(SU, SDep::Data, LDataLatency, *Alias); if (!UnitLatencies) { - ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep)); + computeOperandLatency(SU, UseSU, const_cast<SDep &>(dep)); ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep)); } UseSU->addPred(dep); @@ -436,7 +453,7 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { if (!UnitLatencies) { // Adjust the dependence latency using operand def/use information, then // allow the target to perform its own adjustments. - ComputeOperandLatency(DefSU, SU, const_cast<SDep &>(dep)); + computeOperandLatency(DefSU, SU, const_cast<SDep &>(dep)); const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep)); } @@ -455,20 +472,23 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { /// /// Map each real instruction to its SUnit. /// -/// After initSUnits, the SUnits vector is cannot be resized and the scheduler -/// may hang onto SUnit pointers. We may relax this in the future by using SUnit -/// IDs instead of pointers. +/// After initSUnits, the SUnits vector cannot be resized and the scheduler may +/// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs +/// instead of pointers. +/// +/// MachineScheduler relies on initSUnits numbering the nodes by their order in +/// the original instruction list. void ScheduleDAGInstrs::initSUnits() { // We'll be allocating one SUnit for each real instruction in the region, // which is contained within a basic block. SUnits.reserve(BB->size()); - for (MachineBasicBlock::iterator I = Begin; I != InsertPos; ++I) { + for (MachineBasicBlock::iterator I = RegionBegin; I != RegionEnd; ++I) { MachineInstr *MI = I; if (MI->isDebugValue()) continue; - SUnit *SU = NewSUnit(MI); + SUnit *SU = newSUnit(MI); MISUnitMap[MI] = SU; SU->isCall = MI->isCall(); @@ -478,11 +498,11 @@ void ScheduleDAGInstrs::initSUnits() { if (UnitLatencies) SU->Latency = 1; else - ComputeLatency(SU); + computeLatency(SU); } } -void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { +void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) { // Create an SUnit for each real instruction. initSUnits(); @@ -517,11 +537,11 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { // Model data dependencies between instructions being scheduled and the // ExitSU. - AddSchedBarrierDeps(); + addSchedBarrierDeps(); // Walk the list of instructions, from bottom moving up. MachineInstr *PrevMI = NULL; - for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin; + for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin; MII != MIE; --MII) { MachineInstr *MI = prior(MII); if (MI && PrevMI) { @@ -712,14 +732,9 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { Uses.clear(); VRegDefs.clear(); PendingLoads.clear(); - MISUnitMap.clear(); -} - -void ScheduleDAGInstrs::FinishBlock() { - // Nothing to do. } -void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) { +void ScheduleDAGInstrs::computeLatency(SUnit *SU) { // Compute the latency for the node. if (!InstrItins || InstrItins->isEmpty()) { SU->Latency = 1; @@ -733,7 +748,7 @@ void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) { } } -void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use, +void ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit *Use, SDep& dep) const { if (!InstrItins || InstrItins->isEmpty()) return; @@ -808,37 +823,8 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { return oss.str(); } -// EmitSchedule - Emit the machine code in scheduled order. -MachineBasicBlock *ScheduleDAGInstrs::EmitSchedule() { - Begin = InsertPos; - - // If first instruction was a DBG_VALUE then put it back. - if (FirstDbgValue) - BB->splice(InsertPos, BB, FirstDbgValue); - - // Then re-insert them according to the given schedule. - for (unsigned i = 0, e = Sequence.size(); i != e; i++) { - if (SUnit *SU = Sequence[i]) - BB->splice(InsertPos, BB, SU->getInstr()); - else - // Null SUnit* is a noop. - EmitNoop(); - - // Update the Begin iterator, as the first instruction in the block - // may have been scheduled later. - if (i == 0) - Begin = prior(InsertPos); - } - - // Reinsert any remaining debug_values. - for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator - DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { - std::pair<MachineInstr *, MachineInstr *> P = *prior(DI); - MachineInstr *DbgValue = P.first; - MachineBasicBlock::iterator OrigPrivMI = P.second; - BB->splice(++OrigPrivMI, BB, DbgValue); - } - DbgValues.clear(); - FirstDbgValue = NULL; - return BB; +/// Return the basic block label. It is not necessarilly unique because a block +/// contains multiple scheduling regions. But it is fine for visualization. +std::string ScheduleDAGInstrs::getDAGName() const { + return "dag." + BB->getFullName(); } diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h deleted file mode 100644 index c7ffed9..0000000 --- a/lib/CodeGen/ScheduleDAGInstrs.h +++ /dev/null @@ -1,306 +0,0 @@ -//==- ScheduleDAGInstrs.h - MachineInstr Scheduling --------------*- C++ -*-==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the ScheduleDAGInstrs class, which implements -// scheduling for a MachineInstr-based dependency graph. -// -//===----------------------------------------------------------------------===// - -#ifndef SCHEDULEDAGINSTRS_H -#define SCHEDULEDAGINSTRS_H - -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/ScheduleDAG.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SparseSet.h" -#include <map> - -namespace llvm { - class MachineLoopInfo; - class MachineDominatorTree; - class LiveIntervals; - - /// LoopDependencies - This class analyzes loop-oriented register - /// dependencies, which are used to guide scheduling decisions. - /// For example, loop induction variable increments should be - /// scheduled as soon as possible after the variable's last use. - /// - class LLVM_LIBRARY_VISIBILITY LoopDependencies { - const MachineLoopInfo &MLI; - const MachineDominatorTree &MDT; - - public: - typedef std::map<unsigned, std::pair<const MachineOperand *, unsigned> > - LoopDeps; - LoopDeps Deps; - - LoopDependencies(const MachineLoopInfo &mli, - const MachineDominatorTree &mdt) : - MLI(mli), MDT(mdt) {} - - /// VisitLoop - Clear out any previous state and analyze the given loop. - /// - void VisitLoop(const MachineLoop *Loop) { - assert(Deps.empty() && "stale loop dependencies"); - - MachineBasicBlock *Header = Loop->getHeader(); - SmallSet<unsigned, 8> LoopLiveIns; - for (MachineBasicBlock::livein_iterator LI = Header->livein_begin(), - LE = Header->livein_end(); LI != LE; ++LI) - LoopLiveIns.insert(*LI); - - const MachineDomTreeNode *Node = MDT.getNode(Header); - const MachineBasicBlock *MBB = Node->getBlock(); - assert(Loop->contains(MBB) && - "Loop does not contain header!"); - VisitRegion(Node, MBB, Loop, LoopLiveIns); - } - - private: - void VisitRegion(const MachineDomTreeNode *Node, - const MachineBasicBlock *MBB, - const MachineLoop *Loop, - const SmallSet<unsigned, 8> &LoopLiveIns) { - unsigned Count = 0; - for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); - I != E; ++I) { - const MachineInstr *MI = I; - if (MI->isDebugValue()) - continue; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isUse()) - continue; - unsigned MOReg = MO.getReg(); - if (LoopLiveIns.count(MOReg)) - Deps.insert(std::make_pair(MOReg, std::make_pair(&MO, Count))); - } - ++Count; // Not every iteration due to dbg_value above. - } - - const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); - for (std::vector<MachineDomTreeNode*>::const_iterator I = - Children.begin(), E = Children.end(); I != E; ++I) { - const MachineDomTreeNode *ChildNode = *I; - MachineBasicBlock *ChildBlock = ChildNode->getBlock(); - if (Loop->contains(ChildBlock)) - VisitRegion(ChildNode, ChildBlock, Loop, LoopLiveIns); - } - } - }; - - /// ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of - /// MachineInstrs. - class LLVM_LIBRARY_VISIBILITY ScheduleDAGInstrs : public ScheduleDAG { - const MachineLoopInfo &MLI; - const MachineDominatorTree &MDT; - const MachineFrameInfo *MFI; - const InstrItineraryData *InstrItins; - - /// isPostRA flag indicates vregs cannot be present. - bool IsPostRA; - - /// Live Intervals provides reaching defs in preRA scheduling. - LiveIntervals *LIS; - - DenseMap<MachineInstr*, SUnit*> MISUnitMap; - - /// UnitLatencies (misnamed) flag avoids computing def-use latencies, using - /// the def-side latency only. - bool UnitLatencies; - - /// Combine a SparseSet with a 1x1 vector to track physical registers. - /// The SparseSet allows iterating over the (few) live registers for quickly - /// comparing against a regmask or clearing the set. - /// - /// Storage for the map is allocated once for the pass. The map can be - /// cleared between scheduling regions without freeing unused entries. - class Reg2SUnitsMap { - SparseSet<unsigned> PhysRegSet; - std::vector<std::vector<SUnit*> > SUnits; - public: - typedef SparseSet<unsigned>::const_iterator const_iterator; - - // Allow iteration over register numbers (keys) in the map. If needed, we - // can provide an iterator over SUnits (values) as well. - const_iterator reg_begin() const { return PhysRegSet.begin(); } - const_iterator reg_end() const { return PhysRegSet.end(); } - - /// Initialize the map with the number of registers. - /// If the map is already large enough, no allocation occurs. - /// For simplicity we expect the map to be empty(). - void setRegLimit(unsigned Limit); - - /// Returns true if the map is empty. - bool empty() const { return PhysRegSet.empty(); } - - /// Clear the map without deallocating storage. - void clear(); - - bool contains(unsigned Reg) const { return PhysRegSet.count(Reg); } - - /// If this register is mapped, return its existing SUnits vector. - /// Otherwise map the register and return an empty SUnits vector. - std::vector<SUnit *> &operator[](unsigned Reg) { - bool New = PhysRegSet.insert(Reg).second; - assert((!New || SUnits[Reg].empty()) && "stale SUnits vector"); - (void)New; - return SUnits[Reg]; - } - - /// Erase an existing element without freeing memory. - void erase(unsigned Reg) { - PhysRegSet.erase(Reg); - SUnits[Reg].clear(); - } - }; - /// Defs, Uses - Remember where defs and uses of each register are as we - /// iterate upward through the instructions. This is allocated here instead - /// of inside BuildSchedGraph to avoid the need for it to be initialized and - /// destructed for each block. - Reg2SUnitsMap Defs; - Reg2SUnitsMap Uses; - - /// An individual mapping from virtual register number to SUnit. - struct VReg2SUnit { - unsigned VirtReg; - SUnit *SU; - - VReg2SUnit(unsigned reg, SUnit *su): VirtReg(reg), SU(su) {} - - unsigned getSparseSetKey() const { - return TargetRegisterInfo::virtReg2Index(VirtReg); - } - }; - /// Use SparseSet as a SparseMap by relying on the fact that it never - /// compares ValueT's, only unsigned keys. This allows the set to be cleared - /// between scheduling regions in constant time as long as ValueT does not - /// require a destructor. - typedef SparseSet<VReg2SUnit> VReg2SUnitMap; - /// Track the last instructon in this region defining each virtual register. - VReg2SUnitMap VRegDefs; - - /// PendingLoads - Remember where unknown loads are after the most recent - /// unknown store, as we iterate. As with Defs and Uses, this is here - /// to minimize construction/destruction. - std::vector<SUnit *> PendingLoads; - - /// LoopRegs - Track which registers are used for loop-carried dependencies. - /// - LoopDependencies LoopRegs; - - protected: - - /// DbgValues - Remember instruction that preceeds DBG_VALUE. - typedef std::vector<std::pair<MachineInstr *, MachineInstr *> > - DbgValueVector; - DbgValueVector DbgValues; - MachineInstr *FirstDbgValue; - - public: - MachineBasicBlock::iterator Begin; // The beginning of the range to - // be scheduled. The range extends - // to InsertPos. - unsigned InsertPosIndex; // The index in BB of InsertPos. - - explicit ScheduleDAGInstrs(MachineFunction &mf, - const MachineLoopInfo &mli, - const MachineDominatorTree &mdt, - bool IsPostRAFlag, - LiveIntervals *LIS = 0); - - virtual ~ScheduleDAGInstrs() {} - - /// NewSUnit - Creates a new SUnit and return a ptr to it. - /// - SUnit *NewSUnit(MachineInstr *MI) { -#ifndef NDEBUG - const SUnit *Addr = SUnits.empty() ? 0 : &SUnits[0]; -#endif - SUnits.push_back(SUnit(MI, (unsigned)SUnits.size())); - assert((Addr == 0 || Addr == &SUnits[0]) && - "SUnits std::vector reallocated on the fly!"); - SUnits.back().OrigNode = &SUnits.back(); - return &SUnits.back(); - } - - - /// Run - perform scheduling. - /// - void Run(MachineBasicBlock *bb, - MachineBasicBlock::iterator begin, - MachineBasicBlock::iterator end, - unsigned endindex); - - /// BuildSchedGraph - Build SUnits from the MachineBasicBlock that we are - /// input. - virtual void BuildSchedGraph(AliasAnalysis *AA); - - /// AddSchedBarrierDeps - Add dependencies from instructions in the current - /// list of instructions being scheduled to scheduling barrier. We want to - /// make sure instructions which define registers that are either used by - /// the terminator or are live-out are properly scheduled. This is - /// especially important when the definition latency of the return value(s) - /// are too high to be hidden by the branch or when the liveout registers - /// used by instructions in the fallthrough block. - void AddSchedBarrierDeps(); - - /// ComputeLatency - Compute node latency. - /// - virtual void ComputeLatency(SUnit *SU); - - /// ComputeOperandLatency - Override dependence edge latency using - /// operand use/def information - /// - virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use, - SDep& dep) const; - - virtual MachineBasicBlock *EmitSchedule(); - - /// StartBlock - Prepare to perform scheduling in the given block. - /// - virtual void StartBlock(MachineBasicBlock *BB); - - /// Schedule - Order nodes according to selected style, filling - /// in the Sequence member. - /// - virtual void Schedule() = 0; - - /// FinishBlock - Clean up after scheduling in the given block. - /// - virtual void FinishBlock(); - - virtual void dumpNode(const SUnit *SU) const; - - virtual std::string getGraphNodeLabel(const SUnit *SU) const; - - protected: - SUnit *getSUnit(MachineInstr *MI) const { - DenseMap<MachineInstr*, SUnit*>::const_iterator I = MISUnitMap.find(MI); - if (I == MISUnitMap.end()) - return 0; - return I->second; - } - - void initSUnits(); - void addPhysRegDataDeps(SUnit *SU, const MachineOperand &MO); - void addPhysRegDeps(SUnit *SU, unsigned OperIdx); - void addVRegDefDeps(SUnit *SU, unsigned OperIdx); - void addVRegUseDeps(SUnit *SU, unsigned OperIdx); - - VReg2SUnitMap::iterator findVRegDef(unsigned VirtReg) { - return VRegDefs.find(TargetRegisterInfo::virtReg2Index(VirtReg)); - } - }; -} - -#endif diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp index 4251583..38feee9 100644 --- a/lib/CodeGen/ScheduleDAGPrinter.cpp +++ b/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -41,12 +41,12 @@ namespace llvm { static bool renderGraphFromBottomUp() { return true; } - + static bool hasNodeAddressLabel(const SUnit *Node, const ScheduleDAG *Graph) { return true; } - + /// If you want to override the dot attributes printed for a particular /// edge, override this method. static std::string getEdgeAttributes(const SUnit *Node, @@ -58,7 +58,7 @@ namespace llvm { return "color=blue,style=dashed"; return ""; } - + std::string getNodeLabel(const SUnit *Node, const ScheduleDAG *Graph); static std::string getNodeAttributes(const SUnit *N, @@ -81,18 +81,17 @@ std::string DOTGraphTraits<ScheduleDAG*>::getNodeLabel(const SUnit *SU, /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG /// rendered using 'dot'. /// -void ScheduleDAG::viewGraph() { -// This code is only for debugging! +void ScheduleDAG::viewGraph(const Twine &Name, const Twine &Title) { + // This code is only for debugging! #ifndef NDEBUG - if (BB->getBasicBlock()) - ViewGraph(this, "dag." + MF.getFunction()->getName(), false, - "Scheduling-Units Graph for " + MF.getFunction()->getName() + - ":" + BB->getBasicBlock()->getName()); - else - ViewGraph(this, "dag." + MF.getFunction()->getName(), false, - "Scheduling-Units Graph for " + MF.getFunction()->getName()); + ViewGraph(this, Name, false, Title); #else errs() << "ScheduleDAG::viewGraph is only available in debug builds on " << "systems with Graphviz or gv!\n"; #endif // NDEBUG } + +/// Out-of-line implementation with no arguments is handy for gdb. +void ScheduleDAG::viewGraph() { + viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName()); +} diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt index 9a79217..a6bdc3b 100644 --- a/lib/CodeGen/SelectionDAG/CMakeLists.txt +++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt @@ -16,6 +16,7 @@ add_llvm_library(LLVMSelectionDAG ScheduleDAGSDNodes.cpp SelectionDAG.cpp SelectionDAGBuilder.cpp + SelectionDAGDumper.cpp SelectionDAGISel.cpp SelectionDAGPrinter.cpp ScheduleDAGVLIW.cpp diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 1b148ad..7c4db97 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -80,7 +80,7 @@ namespace { // visit, we pop off the order stack until we find an item that is // also in the contents set. All operations are O(log N). SmallPtrSet<SDNode*, 64> WorkListContents; - std::vector<SDNode*> WorkListOrder; + SmallVector<SDNode*, 64> WorkListOrder; // AA - Used for DAG load/store alias analysis. AliasAnalysis &AA; @@ -381,6 +381,7 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { /// specified expression for the same cost as the expression itself, or 2 if we /// can compute the negated form more cheaply than the expression itself. static char isNegatibleForFree(SDValue Op, bool LegalOperations, + const TargetLowering &TLI, const TargetOptions *Options, unsigned Depth = 0) { // No compile time optimizations on this type. @@ -406,12 +407,17 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations, // FIXME: determine better conditions for this xform. if (!Options->UnsafeFPMath) return 0; + // After operation legalization, it might not be legal to create new FSUBs. + if (LegalOperations && + !TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) + return 0; + // fold (fsub (fadd A, B)) -> (fsub (fneg A), B) - if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Options, - Depth + 1)) + if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, + Options, Depth + 1)) return V; // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) - return isNegatibleForFree(Op.getOperand(1), LegalOperations, Options, + return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, Depth + 1); case ISD::FSUB: // We can't turn -(A-B) into B-A when we honor signed zeros. @@ -425,17 +431,17 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations, if (Options->HonorSignDependentRoundingFPMath()) return 0; // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) - if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Options, - Depth + 1)) + if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, + Options, Depth + 1)) return V; - return isNegatibleForFree(Op.getOperand(1), LegalOperations, Options, + return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, Depth + 1); case ISD::FP_EXTEND: case ISD::FP_ROUND: case ISD::FSIN: - return isNegatibleForFree(Op.getOperand(0), LegalOperations, Options, + return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options, Depth + 1); } } @@ -464,6 +470,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) if (isNegatibleForFree(Op.getOperand(0), LegalOperations, + DAG.getTargetLoweringInfo(), &DAG.getTarget().Options, Depth+1)) return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, @@ -493,6 +500,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) if (isNegatibleForFree(Op.getOperand(0), LegalOperations, + DAG.getTargetLoweringInfo(), &DAG.getTarget().Options, Depth+1)) return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, @@ -997,8 +1005,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) { // worklist *should* contain, and check the node we want to visit is should // actually be visited. do { - N = WorkListOrder.back(); - WorkListOrder.pop_back(); + N = WorkListOrder.pop_back_val(); } while (!WorkListContents.erase(N)); // If N has no uses, it is dead. Make sure to revisit all N's operands once @@ -5507,11 +5514,13 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { N1CFP->getValueAPF().isZero()) return N0; // fold (fadd A, (fneg B)) -> (fsub A, B) - if (isNegatibleForFree(N1, LegalOperations, &DAG.getTarget().Options) == 2) + if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && + isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2) return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, GetNegatedExpression(N1, DAG, LegalOperations)); // fold (fadd (fneg A), B) -> (fsub B, A) - if (isNegatibleForFree(N0, LegalOperations, &DAG.getTarget().Options) == 2) + if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && + isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2) return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1, GetNegatedExpression(N0, DAG, LegalOperations)); @@ -5549,16 +5558,33 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // fold (fsub 0, B) -> -B if (DAG.getTarget().Options.UnsafeFPMath && N0CFP && N0CFP->getValueAPF().isZero()) { - if (isNegatibleForFree(N1, LegalOperations, &DAG.getTarget().Options)) + if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) return GetNegatedExpression(N1, DAG, LegalOperations); if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1); } // fold (fsub A, (fneg B)) -> (fadd A, B) - if (isNegatibleForFree(N1, LegalOperations, &DAG.getTarget().Options)) + if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, GetNegatedExpression(N1, DAG, LegalOperations)); + // If 'unsafe math' is enabled, fold + // (fsub x, (fadd x, y)) -> (fneg y) & + // (fsub x, (fadd y, x)) -> (fneg y) + if (DAG.getTarget().Options.UnsafeFPMath) { + if (N1.getOpcode() == ISD::FADD) { + SDValue N10 = N1->getOperand(0); + SDValue N11 = N1->getOperand(1); + + if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, + &DAG.getTarget().Options)) + return GetNegatedExpression(N11, DAG, LegalOperations); + else if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, + &DAG.getTarget().Options)) + return GetNegatedExpression(N10, DAG, LegalOperations); + } + } + return SDValue(); } @@ -5568,6 +5594,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); EVT VT = N->getValueType(0); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // fold vector ops if (VT.isVector()) { @@ -5598,9 +5625,9 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0); // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) - if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, + if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options)) { - if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, + if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) { // Both can be negated for free, check to see if at least one is cheaper // negated. @@ -5628,6 +5655,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); EVT VT = N->getValueType(0); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // fold vector ops if (VT.isVector()) { @@ -5641,9 +5669,9 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) - if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, + if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options)) { - if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, + if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) { // Both can be negated for free, check to see if at least one is cheaper // negated. @@ -5897,7 +5925,8 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - if (isNegatibleForFree(N0, LegalOperations, &DAG.getTarget().Options)) + if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(), + &DAG.getTarget().Options)) return GetNegatedExpression(N0, DAG, LegalOperations); // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading @@ -6129,8 +6158,7 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) { /// canFoldInAddressingMode - Return true if 'Use' is a load or a store that /// uses N as its base pointer and that N may be folded in the load / store -/// addressing mode. FIXME: This currently only looks for folding of -/// [reg +/- imm] addressing modes. +/// addressing mode. static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG, const TargetLowering &TLI) { @@ -6150,15 +6178,19 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, if (N->getOpcode() == ISD::ADD) { ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); if (Offset) + // [reg +/- imm] AM.BaseOffs = Offset->getSExtValue(); else - return false; + // [reg +/- reg] + AM.Scale = 1; } else if (N->getOpcode() == ISD::SUB) { ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); if (Offset) + // [reg +/- imm] AM.BaseOffs = -Offset->getSExtValue(); else - return false; + // [reg +/- reg] + AM.Scale = 1; } else return false; @@ -7187,6 +7219,11 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { EVT ExtVT = VT.getVectorElementType(); EVT LVT = ExtVT; + // If the result of load has to be truncated, then it's not necessarily + // profitable. + if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT)) + return SDValue(); + if (InVec.getOpcode() == ISD::BITCAST) { // Don't duplicate a load with other uses. if (!InVec.hasOneUse()) @@ -7287,17 +7324,36 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // Note that this replacement assumes that the extractvalue is the only // use of the load; that's okay because we don't want to perform this // transformation in other cases anyway. - SDValue Load = DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr, - LN0->getPointerInfo().getWithOffset(PtrOff), - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->isInvariant(), Align); + SDValue Load; + SDValue Chain; + if (NVT.bitsGT(LVT)) { + // If the result type of vextract is wider than the load, then issue an + // extending load instead. + ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT) + ? ISD::ZEXTLOAD : ISD::EXTLOAD; + Load = DAG.getExtLoad(ExtType, N->getDebugLoc(), NVT, LN0->getChain(), + NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), + LVT, LN0->isVolatile(), LN0->isNonTemporal(),Align); + Chain = Load.getValue(1); + } else { + Load = DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr, + LN0->getPointerInfo().getWithOffset(PtrOff), + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->isInvariant(), Align); + Chain = Load.getValue(1); + if (NVT.bitsLT(LVT)) + Load = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), NVT, Load); + else + Load = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), NVT, Load); + } WorkListRemover DeadNodes(*this); SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) }; - SDValue To[] = { Load.getValue(0), Load.getValue(1) }; + SDValue To[] = { Load, Chain }; DAG.ReplaceAllUsesOfValuesWith(From, To, 2, &DeadNodes); // Since we're explcitly calling ReplaceAllUses, add the new node to the // worklist explicitly as well. AddToWorkList(Load.getNode()); + AddUsersToWorkList(Load.getNode()); // Add users too // Make sure to revisit this node to clean it up; it will usually be dead. AddToWorkList(N); return SDValue(N, 0); @@ -7367,6 +7423,8 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { // will be type-legalized to complex code sequences. // We perform this optimization only before the operation legalizer because we // may introduce illegal operations. + // Create a new simpler BUILD_VECTOR sequence which other optimizations can + // turn into a single shuffle instruction. if ((Level == AfterLegalizeVectorOps || Level == AfterLegalizeTypes) && ValidTypes) { bool isLE = TLI.isLittleEndian(); @@ -7407,6 +7465,8 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VecVT, &Ops[0], Ops.size()); + // The new BUILD_VECTOR node has the potential to be further optimized. + AddToWorkList(BV.getNode()); // Bitcast to the desired type. return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), BV); } @@ -7414,6 +7474,12 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from // at most two distinct vectors, turn this into a shuffle node. + + // May only combine to shuffle after legalize if shuffle is legal. + if (LegalOperations && + !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT)) + return SDValue(); + SDValue VecIn1, VecIn2; for (unsigned i = 0; i != NumInScalars; ++i) { // Ignore undef inputs. diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index fd8ce78..9f4a44a 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -136,15 +136,8 @@ unsigned FastISel::getRegForValue(const Value *V) { return 0; } - // Look up the value to see if we already have a register for it. We - // cache values defined by Instructions across blocks, and other values - // only locally. This is because Instructions already have the SSA - // def-dominates-use requirement enforced. - DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V); - if (I != FuncInfo.ValueMap.end()) - return I->second; - - unsigned Reg = LocalValueMap[V]; + // Look up the value to see if we already have a register for it. + unsigned Reg = lookUpRegForValue(V); if (Reg != 0) return Reg; @@ -199,7 +192,7 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { uint32_t IntBitWidth = IntVT.getSizeInBits(); bool isExact; (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true, - APFloat::rmTowardZero, &isExact); + APFloat::rmTowardZero, &isExact); if (isExact) { APInt IntVal(IntBitWidth, x); @@ -577,12 +570,16 @@ bool FastISel::SelectCall(const User *I) { case Intrinsic::dbg_declare: { const DbgDeclareInst *DI = cast<DbgDeclareInst>(Call); if (!DIVariable(DI->getVariable()).Verify() || - !FuncInfo.MF->getMMI().hasDebugInfo()) + !FuncInfo.MF->getMMI().hasDebugInfo()) { + DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); return true; + } const Value *Address = DI->getAddress(); - if (!Address || isa<UndefValue>(Address) || isa<AllocaInst>(Address)) + if (!Address || isa<UndefValue>(Address)) { + DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); return true; + } unsigned Reg = 0; unsigned Offset = 0; @@ -590,16 +587,25 @@ bool FastISel::SelectCall(const User *I) { // Some arguments' frame index is recorded during argument lowering. Offset = FuncInfo.getArgumentFrameIndex(Arg); if (Offset) - Reg = TRI.getFrameRegister(*FuncInfo.MF); + Reg = TRI.getFrameRegister(*FuncInfo.MF); } if (!Reg) - Reg = getRegForValue(Address); + Reg = lookUpRegForValue(Address); + + if (!Reg && isa<Instruction>(Address) && + (!isa<AllocaInst>(Address) || + !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(Address)))) + Reg = FuncInfo.InitializeRegForValue(Address); if (Reg) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::DBG_VALUE)) .addReg(Reg, RegState::Debug).addImm(Offset) .addMetadata(DI->getVariable()); + else + // We can't yet handle anything else here because it would require + // generating code, thus altering codegen because of debug info. + DEBUG(dbgs() << "Dropping debug info for " << DI); return true; } case Intrinsic::dbg_value: { diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 31df458..1b84b13 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3032,6 +3032,16 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Results[0].getValue(1)); break; } + case ISD::FSUB: { + EVT VT = Node->getValueType(0); + assert(TLI.isOperationLegalOrCustom(ISD::FADD, VT) && + TLI.isOperationLegalOrCustom(ISD::FNEG, VT) && + "Don't know how to expand this FP subtraction!"); + Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1)); + Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1); + Results.push_back(Tmp1); + break; + } case ISD::SUB: { EVT VT = Node->getValueType(0); assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) && @@ -3590,10 +3600,11 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Tmp1, Tmp2, Node->getOperand(2))); break; } + case ISD::FDIV: case ISD::FPOW: { Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1)); - Tmp3 = DAG.getNode(ISD::FPOW, dl, NVT, Tmp1, Tmp2); + Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2); Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp3, DAG.getIntPtrConstant(0))); break; diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index 1a27f3f..ff0136e 100644 --- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -470,7 +470,7 @@ signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) { /// Main resource tracking point. -void ResourcePriorityQueue::ScheduledNode(SUnit *SU) { +void ResourcePriorityQueue::scheduledNode(SUnit *SU) { // Use NULL entry as an event marker to reset // the DFA state. if (!SU) { diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 34ee1f3..24da432 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -43,7 +43,7 @@ namespace { SmallVector<SUnit *, 16> Queue; bool empty() const { return Queue.empty(); } - + void push(SUnit *U) { Queue.push_back(U); } @@ -101,8 +101,8 @@ private: bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); void ListScheduleBottomUp(); - /// ForceUnitLatencies - The fast scheduler doesn't care about real latencies. - bool ForceUnitLatencies() const { return true; } + /// forceUnitLatencies - The fast scheduler doesn't care about real latencies. + bool forceUnitLatencies() const { return true; } }; } // end anonymous namespace @@ -112,7 +112,7 @@ void ScheduleDAGFast::Schedule() { DEBUG(dbgs() << "********** List Scheduling **********\n"); NumLiveRegs = 0; - LiveRegDefs.resize(TRI->getNumRegs(), NULL); + LiveRegDefs.resize(TRI->getNumRegs(), NULL); LiveRegCycles.resize(TRI->getNumRegs(), 0); // Build the scheduling graph. @@ -159,7 +159,7 @@ void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { ReleasePred(SU, &*I); if (I->isAssignedRegDep()) { // This is a physical register dependency and it's impossible or - // expensive to copy the register. Make sure nothing that can + // expensive to copy the register. Make sure nothing that can // clobber the register is scheduled between the predecessor and // this node. if (!LiveRegDefs[I->getReg()]) { @@ -245,10 +245,10 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), SDValue(LoadNode, 1)); - SUnit *NewSU = NewSUnit(N); + SUnit *NewSU = newSUnit(N); assert(N->getNodeId() == -1 && "Node already inserted!"); N->setNodeId(NewSU->NodeNum); - + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { @@ -268,7 +268,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { LoadSU = &SUnits[LoadNode->getNodeId()]; isNewLoad = false; } else { - LoadSU = NewSUnit(LoadNode); + LoadSU = newSUnit(LoadNode); LoadNode->setNodeId(LoadSU->NodeNum); } @@ -329,7 +329,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { D.setSUnit(LoadSU); AddPred(SuccDep, D); } - } + } if (isNewLoad) { AddPred(NewSU, SDep(LoadSU, SDep::Order, LoadSU->Latency)); } @@ -381,11 +381,11 @@ void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, const TargetRegisterClass *DestRC, const TargetRegisterClass *SrcRC, SmallVector<SUnit*, 2> &Copies) { - SUnit *CopyFromSU = NewSUnit(static_cast<SDNode *>(NULL)); + SUnit *CopyFromSU = newSUnit(static_cast<SDNode *>(NULL)); CopyFromSU->CopySrcRC = SrcRC; CopyFromSU->CopyDstRC = DestRC; - SUnit *CopyToSU = NewSUnit(static_cast<SDNode *>(NULL)); + SUnit *CopyToSU = newSUnit(static_cast<SDNode *>(NULL)); CopyToSU->CopySrcRC = DestRC; CopyToSU->CopyDstRC = SrcRC; @@ -425,7 +425,7 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); unsigned NumRes = MCID.getNumDefs(); - for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { + for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { if (Reg == *ImpDef) break; ++NumRes; @@ -508,7 +508,7 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); if (!MCID.ImplicitDefs) continue; - for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg) { + for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) { CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); } } @@ -630,7 +630,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() { std::reverse(Sequence.begin(), Sequence.end()); #ifndef NDEBUG - VerifySchedule(/*isBottomUp=*/true); + VerifyScheduledSequence(/*isBottomUp=*/true); #endif } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 1017d36..f44adfc 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -232,7 +232,7 @@ private: /// Updates the topological ordering if required. SUnit *CreateNewSUnit(SDNode *N) { unsigned NumSUnits = SUnits.size(); - SUnit *NewNode = NewSUnit(N); + SUnit *NewNode = newSUnit(N); // Update the topological ordering. if (NewNode->NodeNum >= NumSUnits) Topo.InitDAGTopologicalSorting(); @@ -250,9 +250,9 @@ private: return NewNode; } - /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't + /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't /// need actual latency information but the hybrid scheduler does. - bool ForceUnitLatencies() const { + bool forceUnitLatencies() const { return !NeedLatency; } }; @@ -327,6 +327,12 @@ void ScheduleDAGRRList::Schedule() { ListScheduleBottomUp(); AvailableQueue->releaseState(); + + DEBUG({ + dbgs() << "*** Final schedule ***\n"; + dumpSchedule(); + dbgs() << '\n'; + }); } //===----------------------------------------------------------------------===// @@ -348,7 +354,7 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { #endif --PredSU->NumSuccsLeft; - if (!ForceUnitLatencies()) { + if (!forceUnitLatencies()) { // Updating predecessor's height. This is now the cycle when the // predecessor can be scheduled without causing a pipeline stall. PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency()); @@ -695,7 +701,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { Sequence.push_back(SU); - AvailableQueue->ScheduledNode(SU); + AvailableQueue->scheduledNode(SU); // If HazardRec is disabled, and each inst counts as one cycle, then // advance CurCycle before ReleasePredecessors to avoid useless pushes to @@ -842,7 +848,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { else { AvailableQueue->push(SU); } - AvailableQueue->UnscheduledNode(SU); + AvailableQueue->unscheduledNode(SU); } /// After backtracking, the hazard checker needs to be restored to a state @@ -963,7 +969,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { LoadNode->setNodeId(LoadSU->NodeNum); InitNumRegDefsLeft(LoadSU); - ComputeLatency(LoadSU); + computeLatency(LoadSU); } SUnit *NewSU = CreateNewSUnit(N); @@ -981,7 +987,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { NewSU->isCommutable = true; InitNumRegDefsLeft(NewSU); - ComputeLatency(NewSU); + computeLatency(NewSU); // Record all the edges to and from the old SU, by category. SmallVector<SDep, 4> ChainPreds; @@ -1160,7 +1166,7 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); unsigned NumRes = MCID.getNumDefs(); - for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { + for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { if (Reg == *ImpDef) break; ++NumRes; @@ -1286,7 +1292,7 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) { const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); if (!MCID.ImplicitDefs) continue; - for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg) + for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); } @@ -1475,7 +1481,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { std::reverse(Sequence.begin(), Sequence.end()); #ifndef NDEBUG - VerifySchedule(/*isBottomUp=*/true); + VerifyScheduledSequence(/*isBottomUp=*/true); #endif } @@ -1681,9 +1687,9 @@ public: int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const; - void ScheduledNode(SUnit *SU); + void scheduledNode(SUnit *SU); - void UnscheduledNode(SUnit *SU); + void unscheduledNode(SUnit *SU); protected: bool canClobber(const SUnit *SU, const SUnit *Op); @@ -1984,7 +1990,7 @@ int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const { return PDiff; } -void RegReductionPQBase::ScheduledNode(SUnit *SU) { +void RegReductionPQBase::scheduledNode(SUnit *SU) { if (!TracksRegPressure) return; @@ -2053,7 +2059,7 @@ void RegReductionPQBase::ScheduledNode(SUnit *SU) { dumpRegPressure(); } -void RegReductionPQBase::UnscheduledNode(SUnit *SU) { +void RegReductionPQBase::unscheduledNode(SUnit *SU) { if (!TracksRegPressure) return; @@ -2661,7 +2667,7 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, ScheduleDAGRRList *scheduleDAG, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) { - const unsigned *ImpDefs + const uint16_t *ImpDefs = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs(); const uint32_t *RegMask = getNodeRegMask(SU->getNode()); if(!ImpDefs && !RegMask) @@ -2680,7 +2686,7 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, return true; if (ImpDefs) - for (const unsigned *ImpDef = ImpDefs; *ImpDef; ++ImpDef) + for (const uint16_t *ImpDef = ImpDefs; *ImpDef; ++ImpDef) // Return true if SU clobbers this physical register use and the // definition of the register reaches from DepSU. IsReachable queries // a topological forward sort of the DAG (following the successors). @@ -2699,13 +2705,13 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, const TargetRegisterInfo *TRI) { SDNode *N = SuccSU->getNode(); unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); - const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); + const uint16_t *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); assert(ImpDefs && "Caller should check hasPhysRegDefs"); for (const SDNode *SUNode = SU->getNode(); SUNode; SUNode = SUNode->getGluedNode()) { if (!SUNode->isMachineOpcode()) continue; - const unsigned *SUImpDefs = + const uint16_t *SUImpDefs = TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); const uint32_t *SURegMask = getNodeRegMask(SUNode); if (!SUImpDefs && !SURegMask) diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 71f07d6..69dd813 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -17,6 +17,8 @@ #include "ScheduleDAGSDNodes.h" #include "InstrEmitter.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" @@ -44,20 +46,26 @@ static cl::opt<int> HighLatencyCycles( "instructions take for targets with no itinerary")); ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf) - : ScheduleDAG(mf), + : ScheduleDAG(mf), BB(0), DAG(0), InstrItins(mf.getTarget().getInstrItineraryData()) {} /// Run - perform scheduling. /// -void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb, - MachineBasicBlock::iterator insertPos) { +void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb) { + BB = bb; DAG = dag; - ScheduleDAG::Run(bb, insertPos); + + // Clear the scheduler's SUnit DAG. + ScheduleDAG::clearDAG(); + Sequence.clear(); + + // Invoke the target's selection of scheduler. + Schedule(); } /// NewSUnit - Creates a new SUnit and return a ptr to it. /// -SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) { +SUnit *ScheduleDAGSDNodes::newSUnit(SDNode *N) { #ifndef NDEBUG const SUnit *Addr = 0; if (!SUnits.empty()) @@ -79,7 +87,7 @@ SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) { } SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) { - SUnit *SU = NewSUnit(Old->getNode()); + SUnit *SU = newSUnit(Old->getNode()); SU->OrigNode = Old->OrigNode; SU->Latency = Old->Latency; SU->isVRegCycle = Old->isVRegCycle; @@ -302,7 +310,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { // If this node has already been processed, stop now. if (NI->getNodeId() != -1) continue; - SUnit *NodeSUnit = NewSUnit(NI); + SUnit *NodeSUnit = newSUnit(NI); // See if anything is glued to this node, if so, add them to glued // nodes. Nodes can have at most one glue input and one glue output. Glue @@ -360,7 +368,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { InitNumRegDefsLeft(NodeSUnit); // Assign the Latency field of NodeSUnit using target-provided information. - ComputeLatency(NodeSUnit); + computeLatency(NodeSUnit); } // Find all call operands. @@ -382,7 +390,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() { const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); // Check to see if the scheduler cares about latencies. - bool UnitLatencies = ForceUnitLatencies(); + bool UnitLatencies = forceUnitLatencies(); // Pass 2: add the preds, succs, etc. for (unsigned su = 0, e = SUnits.size(); su != e; ++su) { @@ -448,7 +456,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() { const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data, OpLatency, PhysReg); if (!isChain && !UnitLatencies) { - ComputeOperandLatency(OpN, N, i, const_cast<SDep &>(dep)); + computeOperandLatency(OpN, N, i, const_cast<SDep &>(dep)); ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep)); } @@ -541,7 +549,7 @@ void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) { } } -void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { +void ScheduleDAGSDNodes::computeLatency(SUnit *SU) { SDNode *N = SU->getNode(); // TokenFactor operands are considered zero latency, and some schedulers @@ -553,7 +561,7 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { } // Check to see if the scheduler cares about latencies. - if (ForceUnitLatencies()) { + if (forceUnitLatencies()) { SU->Latency = 1; return; } @@ -575,10 +583,10 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { SU->Latency += TII->getInstrLatency(InstrItins, N); } -void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use, +void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use, unsigned OpIdx, SDep& dep) const{ // Check to see if the scheduler cares about latencies. - if (ForceUnitLatencies()) + if (forceUnitLatencies()) return; if (dep.getKind() != SDep::Data) @@ -621,6 +629,30 @@ void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { } } +void ScheduleDAGSDNodes::dumpSchedule() const { + for (unsigned i = 0, e = Sequence.size(); i != e; i++) { + if (SUnit *SU = Sequence[i]) + SU->dump(this); + else + dbgs() << "**** NOOP ****\n"; + } +} + +#ifndef NDEBUG +/// VerifyScheduledSequence - Verify that all SUnits were scheduled and that +/// their state is consistent with the nodes listed in Sequence. +/// +void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) { + unsigned ScheduledNodes = ScheduleDAG::VerifyScheduledDAG(isBottomUp); + unsigned Noops = 0; + for (unsigned i = 0, e = Sequence.size(); i != e; ++i) + if (!Sequence[i]) + ++Noops; + assert(Sequence.size() - Noops == ScheduledNodes && + "The number of nodes scheduled doesn't match the expected number!"); +} +#endif // NDEBUG + namespace { struct OrderSorter { bool operator()(const std::pair<unsigned, MachineInstr*> &A, @@ -686,9 +718,48 @@ static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order); } +void ScheduleDAGSDNodes:: +EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap, + MachineBasicBlock::iterator InsertPos) { + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) continue; // ignore chain preds + if (I->getSUnit()->CopyDstRC) { + // Copy to physical register. + DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit()); + assert(VRI != VRBaseMap.end() && "Node emitted out of order - late"); + // Find the destination physical register. + unsigned Reg = 0; + for (SUnit::const_succ_iterator II = SU->Succs.begin(), + EE = SU->Succs.end(); II != EE; ++II) { + if (II->isCtrl()) continue; // ignore chain preds + if (II->getReg()) { + Reg = II->getReg(); + break; + } + } + BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg) + .addReg(VRI->second); + } else { + // Copy from physical register. + assert(I->getReg() && "Unknown physical register!"); + unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC); + bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second; + (void)isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); + BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase) + .addReg(I->getReg()); + } + break; + } +} -/// EmitSchedule - Emit the machine code in scheduled order. -MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { +/// EmitSchedule - Emit the machine code in scheduled order. Return the new +/// InsertPos and MachineBasicBlock that contains this insertion +/// point. ScheduleDAGSDNodes holds a BB pointer for convenience, but this does +/// not necessarily refer to returned BB. The emitter may split blocks. +MachineBasicBlock *ScheduleDAGSDNodes:: +EmitSchedule(MachineBasicBlock::iterator &InsertPos) { InstrEmitter Emitter(BB, InsertPos); DenseMap<SDValue, unsigned> VRBaseMap; DenseMap<SUnit*, unsigned> CopyVRBaseMap; @@ -711,7 +782,7 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { SUnit *SU = Sequence[i]; if (!SU) { // Null SUnit* is a noop. - EmitNoop(); + TII->insertNoop(*Emitter.getBlock(), InsertPos); continue; } @@ -719,7 +790,7 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { // SDNode and any glued SDNodes and append them to the block. if (!SU->getNode()) { // Emit a copy. - EmitPhysRegCopy(SU, CopyVRBaseMap); + EmitPhysRegCopy(SU, CopyVRBaseMap, InsertPos); continue; } @@ -784,19 +855,24 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { } // Add trailing DbgValue's before the terminator. FIXME: May want to add // some of them before one or more conditional branches? + SmallVector<MachineInstr*, 8> DbgMIs; while (DI != DE) { - MachineBasicBlock *InsertBB = Emitter.getBlock(); - MachineBasicBlock::iterator Pos= Emitter.getBlock()->getFirstTerminator(); - if (!(*DI)->isInvalidated()) { - MachineInstr *DbgMI= Emitter.EmitDbgValue(*DI, VRBaseMap); - if (DbgMI) - InsertBB->insert(Pos, DbgMI); - } + if (!(*DI)->isInvalidated()) + if (MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap)) + DbgMIs.push_back(DbgMI); ++DI; } + + MachineBasicBlock *InsertBB = Emitter.getBlock(); + MachineBasicBlock::iterator Pos = InsertBB->getFirstTerminator(); + InsertBB->insert(Pos, DbgMIs.begin(), DbgMIs.end()); } - BB = Emitter.getBlock(); InsertPos = Emitter.getInsertPos(); - return BB; + return Emitter.getBlock(); +} + +/// Return the basic block label. +std::string ScheduleDAGSDNodes::getDAGName() const { + return "sunit-dag." + BB->getFullName(); } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 17b4901..75940ec 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -35,17 +35,20 @@ namespace llvm { /// class ScheduleDAGSDNodes : public ScheduleDAG { public: + MachineBasicBlock *BB; SelectionDAG *DAG; // DAG of the current basic block const InstrItineraryData *InstrItins; + /// The schedule. Null SUnit*'s represent noop instructions. + std::vector<SUnit*> Sequence; + explicit ScheduleDAGSDNodes(MachineFunction &mf); virtual ~ScheduleDAGSDNodes() {} /// Run - perform scheduling. /// - void Run(SelectionDAG *dag, MachineBasicBlock *bb, - MachineBasicBlock::iterator insertPos); + void Run(SelectionDAG *dag, MachineBasicBlock *bb); /// isPassiveNode - Return true if the node is a non-scheduled leaf. /// @@ -68,7 +71,7 @@ namespace llvm { /// NewSUnit - Creates a new SUnit and return a ptr to it. /// - SUnit *NewSUnit(SDNode *N); + SUnit *newSUnit(SDNode *N); /// Clone - Creates a clone of the specified SUnit. It does not copy the /// predecessors / successors info nor the temporary scheduling states. @@ -79,7 +82,7 @@ namespace llvm { /// are input. This SUnit graph is similar to the SelectionDAG, but /// excludes nodes that aren't interesting to scheduling, and represents /// flagged together nodes with a single SUnit. - virtual void BuildSchedGraph(AliasAnalysis *AA); + void BuildSchedGraph(AliasAnalysis *AA); /// InitVRegCycleFlag - Set isVRegCycle if this node's single use is /// CopyToReg and its only active data operands are CopyFromReg within a @@ -91,30 +94,41 @@ namespace llvm { /// void InitNumRegDefsLeft(SUnit *SU); - /// ComputeLatency - Compute node latency. + /// computeLatency - Compute node latency. /// - virtual void ComputeLatency(SUnit *SU); + virtual void computeLatency(SUnit *SU); - /// ComputeOperandLatency - Override dependence edge latency using + /// computeOperandLatency - Override dependence edge latency using /// operand use/def information /// - virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use, + virtual void computeOperandLatency(SUnit *Def, SUnit *Use, SDep& dep) const { } - virtual void ComputeOperandLatency(SDNode *Def, SDNode *Use, + virtual void computeOperandLatency(SDNode *Def, SDNode *Use, unsigned OpIdx, SDep& dep) const; - virtual MachineBasicBlock *EmitSchedule(); - /// Schedule - Order nodes according to selected style, filling /// in the Sequence member. /// virtual void Schedule() = 0; + /// VerifyScheduledSequence - Verify that all SUnits are scheduled and + /// consistent with the Sequence of scheduled instructions. + void VerifyScheduledSequence(bool isBottomUp); + + /// EmitSchedule - Insert MachineInstrs into the MachineBasicBlock + /// according to the order specified in Sequence. + /// + MachineBasicBlock *EmitSchedule(MachineBasicBlock::iterator &InsertPos); + virtual void dumpNode(const SUnit *SU) const; + void dumpSchedule() const; + virtual std::string getGraphNodeLabel(const SUnit *SU) const; + virtual std::string getDAGName() const; + virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const; /// RegDefIter - In place iteration over the values defined by an @@ -160,6 +174,9 @@ namespace llvm { /// BuildSchedUnits, AddSchedEdges - Helper functions for BuildSchedGraph. void BuildSchedUnits(); void AddSchedEdges(); + + void EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap, + MachineBasicBlock::iterator InsertPos); }; } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index 7d12509..c851291 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -158,7 +158,7 @@ void ScheduleDAGVLIW::scheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { releaseSuccessors(SU); SU->isScheduled = true; - AvailableQueue->ScheduledNode(SU); + AvailableQueue->scheduledNode(SU); } /// listScheduleTopDown - The main loop of list scheduling for top-down @@ -202,7 +202,7 @@ void ScheduleDAGVLIW::listScheduleTopDown() { // don't advance the hazard recognizer. if (AvailableQueue->empty()) { // Reset DFA state. - AvailableQueue->ScheduledNode(0); + AvailableQueue->scheduledNode(0); ++CurCycle; continue; } @@ -261,7 +261,7 @@ void ScheduleDAGVLIW::listScheduleTopDown() { } #ifndef NDEBUG - VerifySchedule(/*isBottomUp=*/false); + VerifyScheduledSequence(/*isBottomUp=*/false); #endif } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 796abf4..e3a7305 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -124,20 +124,29 @@ bool ISD::isBuildVectorAllOnes(const SDNode *N) { if (i == e) return false; // Do not accept build_vectors that aren't all constants or which have non-~0 - // elements. + // elements. We have to be a bit careful here, as the type of the constant + // may not be the same as the type of the vector elements due to type + // legalization (the elements are promoted to a legal type for the target and + // a vector of a type may be legal when the base element type is not). + // We only want to check enough bits to cover the vector elements, because + // we care if the resultant vector is all ones, not whether the individual + // constants are. SDValue NotZero = N->getOperand(i); + unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits(); if (isa<ConstantSDNode>(NotZero)) { - if (!cast<ConstantSDNode>(NotZero)->isAllOnesValue()) + if (cast<ConstantSDNode>(NotZero)->getAPIntValue().countTrailingOnes() < + EltSize) return false; } else if (isa<ConstantFPSDNode>(NotZero)) { - if (!cast<ConstantFPSDNode>(NotZero)->getValueAPF(). - bitcastToAPInt().isAllOnesValue()) + if (cast<ConstantFPSDNode>(NotZero)->getValueAPF() + .bitcastToAPInt().countTrailingOnes() < EltSize) return false; } else return false; // Okay, we have at least one ~0 value, check to see if the rest match or are - // undefs. + // undefs. Even with the above element type twiddling, this should be OK, as + // the same type legalization should have applied to all the elements. for (++i; i != e; ++i) if (N->getOperand(i) != NotZero && N->getOperand(i).getOpcode() != ISD::UNDEF) @@ -5904,571 +5913,6 @@ uint64_t SDNode::getConstantOperandVal(unsigned Num) const { return cast<ConstantSDNode>(OperandList[Num])->getZExtValue(); } -std::string SDNode::getOperationName(const SelectionDAG *G) const { - switch (getOpcode()) { - default: - if (getOpcode() < ISD::BUILTIN_OP_END) - return "<<Unknown DAG Node>>"; - if (isMachineOpcode()) { - if (G) - if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo()) - if (getMachineOpcode() < TII->getNumOpcodes()) - return TII->getName(getMachineOpcode()); - return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>"; - } - if (G) { - const TargetLowering &TLI = G->getTargetLoweringInfo(); - const char *Name = TLI.getTargetNodeName(getOpcode()); - if (Name) return Name; - return "<<Unknown Target Node #" + utostr(getOpcode()) + ">>"; - } - return "<<Unknown Node #" + utostr(getOpcode()) + ">>"; - -#ifndef NDEBUG - case ISD::DELETED_NODE: - return "<<Deleted Node!>>"; -#endif - case ISD::PREFETCH: return "Prefetch"; - case ISD::MEMBARRIER: return "MemBarrier"; - case ISD::ATOMIC_FENCE: return "AtomicFence"; - case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap"; - case ISD::ATOMIC_SWAP: return "AtomicSwap"; - case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd"; - case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub"; - case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd"; - case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr"; - case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor"; - case ISD::ATOMIC_LOAD_NAND: return "AtomicLoadNand"; - case ISD::ATOMIC_LOAD_MIN: return "AtomicLoadMin"; - case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax"; - case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin"; - case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax"; - case ISD::ATOMIC_LOAD: return "AtomicLoad"; - case ISD::ATOMIC_STORE: return "AtomicStore"; - case ISD::PCMARKER: return "PCMarker"; - case ISD::READCYCLECOUNTER: return "ReadCycleCounter"; - case ISD::SRCVALUE: return "SrcValue"; - case ISD::MDNODE_SDNODE: return "MDNode"; - case ISD::EntryToken: return "EntryToken"; - case ISD::TokenFactor: return "TokenFactor"; - case ISD::AssertSext: return "AssertSext"; - case ISD::AssertZext: return "AssertZext"; - - case ISD::BasicBlock: return "BasicBlock"; - case ISD::VALUETYPE: return "ValueType"; - case ISD::Register: return "Register"; - case ISD::RegisterMask: return "RegisterMask"; - case ISD::Constant: return "Constant"; - case ISD::ConstantFP: return "ConstantFP"; - case ISD::GlobalAddress: return "GlobalAddress"; - case ISD::GlobalTLSAddress: return "GlobalTLSAddress"; - case ISD::FrameIndex: return "FrameIndex"; - case ISD::JumpTable: return "JumpTable"; - case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE"; - case ISD::RETURNADDR: return "RETURNADDR"; - case ISD::FRAMEADDR: return "FRAMEADDR"; - case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET"; - case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR"; - case ISD::LSDAADDR: return "LSDAADDR"; - case ISD::EHSELECTION: return "EHSELECTION"; - case ISD::EH_RETURN: return "EH_RETURN"; - case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP"; - case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP"; - case ISD::ConstantPool: return "ConstantPool"; - case ISD::ExternalSymbol: return "ExternalSymbol"; - case ISD::BlockAddress: return "BlockAddress"; - case ISD::INTRINSIC_WO_CHAIN: - case ISD::INTRINSIC_VOID: - case ISD::INTRINSIC_W_CHAIN: { - unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1; - unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue(); - if (IID < Intrinsic::num_intrinsics) - return Intrinsic::getName((Intrinsic::ID)IID); - else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo()) - return TII->getName(IID); - llvm_unreachable("Invalid intrinsic ID"); - } - - case ISD::BUILD_VECTOR: return "BUILD_VECTOR"; - case ISD::TargetConstant: return "TargetConstant"; - case ISD::TargetConstantFP:return "TargetConstantFP"; - case ISD::TargetGlobalAddress: return "TargetGlobalAddress"; - case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress"; - case ISD::TargetFrameIndex: return "TargetFrameIndex"; - case ISD::TargetJumpTable: return "TargetJumpTable"; - case ISD::TargetConstantPool: return "TargetConstantPool"; - case ISD::TargetExternalSymbol: return "TargetExternalSymbol"; - case ISD::TargetBlockAddress: return "TargetBlockAddress"; - - case ISD::CopyToReg: return "CopyToReg"; - case ISD::CopyFromReg: return "CopyFromReg"; - case ISD::UNDEF: return "undef"; - case ISD::MERGE_VALUES: return "merge_values"; - case ISD::INLINEASM: return "inlineasm"; - case ISD::EH_LABEL: return "eh_label"; - case ISD::HANDLENODE: return "handlenode"; - - // Unary operators - case ISD::FABS: return "fabs"; - case ISD::FNEG: return "fneg"; - case ISD::FSQRT: return "fsqrt"; - case ISD::FSIN: return "fsin"; - case ISD::FCOS: return "fcos"; - case ISD::FTRUNC: return "ftrunc"; - case ISD::FFLOOR: return "ffloor"; - case ISD::FCEIL: return "fceil"; - case ISD::FRINT: return "frint"; - case ISD::FNEARBYINT: return "fnearbyint"; - case ISD::FEXP: return "fexp"; - case ISD::FEXP2: return "fexp2"; - case ISD::FLOG: return "flog"; - case ISD::FLOG2: return "flog2"; - case ISD::FLOG10: return "flog10"; - - // Binary operators - case ISD::ADD: return "add"; - case ISD::SUB: return "sub"; - case ISD::MUL: return "mul"; - case ISD::MULHU: return "mulhu"; - case ISD::MULHS: return "mulhs"; - case ISD::SDIV: return "sdiv"; - case ISD::UDIV: return "udiv"; - case ISD::SREM: return "srem"; - case ISD::UREM: return "urem"; - case ISD::SMUL_LOHI: return "smul_lohi"; - case ISD::UMUL_LOHI: return "umul_lohi"; - case ISD::SDIVREM: return "sdivrem"; - case ISD::UDIVREM: return "udivrem"; - case ISD::AND: return "and"; - case ISD::OR: return "or"; - case ISD::XOR: return "xor"; - case ISD::SHL: return "shl"; - case ISD::SRA: return "sra"; - case ISD::SRL: return "srl"; - case ISD::ROTL: return "rotl"; - case ISD::ROTR: return "rotr"; - case ISD::FADD: return "fadd"; - case ISD::FSUB: return "fsub"; - case ISD::FMUL: return "fmul"; - case ISD::FDIV: return "fdiv"; - case ISD::FMA: return "fma"; - case ISD::FREM: return "frem"; - case ISD::FCOPYSIGN: return "fcopysign"; - case ISD::FGETSIGN: return "fgetsign"; - case ISD::FPOW: return "fpow"; - - case ISD::FPOWI: return "fpowi"; - case ISD::SETCC: return "setcc"; - case ISD::SELECT: return "select"; - case ISD::VSELECT: return "vselect"; - case ISD::SELECT_CC: return "select_cc"; - case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt"; - case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt"; - case ISD::CONCAT_VECTORS: return "concat_vectors"; - case ISD::INSERT_SUBVECTOR: return "insert_subvector"; - case ISD::EXTRACT_SUBVECTOR: return "extract_subvector"; - case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector"; - case ISD::VECTOR_SHUFFLE: return "vector_shuffle"; - case ISD::CARRY_FALSE: return "carry_false"; - case ISD::ADDC: return "addc"; - case ISD::ADDE: return "adde"; - case ISD::SADDO: return "saddo"; - case ISD::UADDO: return "uaddo"; - case ISD::SSUBO: return "ssubo"; - case ISD::USUBO: return "usubo"; - case ISD::SMULO: return "smulo"; - case ISD::UMULO: return "umulo"; - case ISD::SUBC: return "subc"; - case ISD::SUBE: return "sube"; - case ISD::SHL_PARTS: return "shl_parts"; - case ISD::SRA_PARTS: return "sra_parts"; - case ISD::SRL_PARTS: return "srl_parts"; - - // Conversion operators. - case ISD::SIGN_EXTEND: return "sign_extend"; - case ISD::ZERO_EXTEND: return "zero_extend"; - case ISD::ANY_EXTEND: return "any_extend"; - case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg"; - case ISD::TRUNCATE: return "truncate"; - case ISD::FP_ROUND: return "fp_round"; - case ISD::FLT_ROUNDS_: return "flt_rounds"; - case ISD::FP_ROUND_INREG: return "fp_round_inreg"; - case ISD::FP_EXTEND: return "fp_extend"; - - case ISD::SINT_TO_FP: return "sint_to_fp"; - case ISD::UINT_TO_FP: return "uint_to_fp"; - case ISD::FP_TO_SINT: return "fp_to_sint"; - case ISD::FP_TO_UINT: return "fp_to_uint"; - case ISD::BITCAST: return "bitcast"; - case ISD::FP16_TO_FP32: return "fp16_to_fp32"; - case ISD::FP32_TO_FP16: return "fp32_to_fp16"; - - case ISD::CONVERT_RNDSAT: { - switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) { - default: llvm_unreachable("Unknown cvt code!"); - case ISD::CVT_FF: return "cvt_ff"; - case ISD::CVT_FS: return "cvt_fs"; - case ISD::CVT_FU: return "cvt_fu"; - case ISD::CVT_SF: return "cvt_sf"; - case ISD::CVT_UF: return "cvt_uf"; - case ISD::CVT_SS: return "cvt_ss"; - case ISD::CVT_SU: return "cvt_su"; - case ISD::CVT_US: return "cvt_us"; - case ISD::CVT_UU: return "cvt_uu"; - } - } - - // Control flow instructions - case ISD::BR: return "br"; - case ISD::BRIND: return "brind"; - case ISD::BR_JT: return "br_jt"; - case ISD::BRCOND: return "brcond"; - case ISD::BR_CC: return "br_cc"; - case ISD::CALLSEQ_START: return "callseq_start"; - case ISD::CALLSEQ_END: return "callseq_end"; - - // Other operators - case ISD::LOAD: return "load"; - case ISD::STORE: return "store"; - case ISD::VAARG: return "vaarg"; - case ISD::VACOPY: return "vacopy"; - case ISD::VAEND: return "vaend"; - case ISD::VASTART: return "vastart"; - case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc"; - case ISD::EXTRACT_ELEMENT: return "extract_element"; - case ISD::BUILD_PAIR: return "build_pair"; - case ISD::STACKSAVE: return "stacksave"; - case ISD::STACKRESTORE: return "stackrestore"; - case ISD::TRAP: return "trap"; - - // Bit manipulation - case ISD::BSWAP: return "bswap"; - case ISD::CTPOP: return "ctpop"; - case ISD::CTTZ: return "cttz"; - case ISD::CTTZ_ZERO_UNDEF: return "cttz_zero_undef"; - case ISD::CTLZ: return "ctlz"; - case ISD::CTLZ_ZERO_UNDEF: return "ctlz_zero_undef"; - - // Trampolines - case ISD::INIT_TRAMPOLINE: return "init_trampoline"; - case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline"; - - case ISD::CONDCODE: - switch (cast<CondCodeSDNode>(this)->get()) { - default: llvm_unreachable("Unknown setcc condition!"); - case ISD::SETOEQ: return "setoeq"; - case ISD::SETOGT: return "setogt"; - case ISD::SETOGE: return "setoge"; - case ISD::SETOLT: return "setolt"; - case ISD::SETOLE: return "setole"; - case ISD::SETONE: return "setone"; - - case ISD::SETO: return "seto"; - case ISD::SETUO: return "setuo"; - case ISD::SETUEQ: return "setue"; - case ISD::SETUGT: return "setugt"; - case ISD::SETUGE: return "setuge"; - case ISD::SETULT: return "setult"; - case ISD::SETULE: return "setule"; - case ISD::SETUNE: return "setune"; - - case ISD::SETEQ: return "seteq"; - case ISD::SETGT: return "setgt"; - case ISD::SETGE: return "setge"; - case ISD::SETLT: return "setlt"; - case ISD::SETLE: return "setle"; - case ISD::SETNE: return "setne"; - - case ISD::SETTRUE: return "settrue"; - case ISD::SETTRUE2: return "settrue2"; - case ISD::SETFALSE: return "setfalse"; - case ISD::SETFALSE2: return "setfalse2"; - } - } -} - -const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) { - switch (AM) { - default: - return ""; - case ISD::PRE_INC: - return "<pre-inc>"; - case ISD::PRE_DEC: - return "<pre-dec>"; - case ISD::POST_INC: - return "<post-inc>"; - case ISD::POST_DEC: - return "<post-dec>"; - } -} - -std::string ISD::ArgFlagsTy::getArgFlagsString() { - std::string S = "< "; - - if (isZExt()) - S += "zext "; - if (isSExt()) - S += "sext "; - if (isInReg()) - S += "inreg "; - if (isSRet()) - S += "sret "; - if (isByVal()) - S += "byval "; - if (isNest()) - S += "nest "; - if (getByValAlign()) - S += "byval-align:" + utostr(getByValAlign()) + " "; - if (getOrigAlign()) - S += "orig-align:" + utostr(getOrigAlign()) + " "; - if (getByValSize()) - S += "byval-size:" + utostr(getByValSize()) + " "; - return S + ">"; -} - -void SDNode::dump() const { dump(0); } -void SDNode::dump(const SelectionDAG *G) const { - print(dbgs(), G); - dbgs() << '\n'; -} - -void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const { - OS << (void*)this << ": "; - - for (unsigned i = 0, e = getNumValues(); i != e; ++i) { - if (i) OS << ","; - if (getValueType(i) == MVT::Other) - OS << "ch"; - else - OS << getValueType(i).getEVTString(); - } - OS << " = " << getOperationName(G); -} - -void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { - if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) { - if (!MN->memoperands_empty()) { - OS << "<"; - OS << "Mem:"; - for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(), - e = MN->memoperands_end(); i != e; ++i) { - OS << **i; - if (llvm::next(i) != e) - OS << " "; - } - OS << ">"; - } - } else if (const ShuffleVectorSDNode *SVN = - dyn_cast<ShuffleVectorSDNode>(this)) { - OS << "<"; - for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) { - int Idx = SVN->getMaskElt(i); - if (i) OS << ","; - if (Idx < 0) - OS << "u"; - else - OS << Idx; - } - OS << ">"; - } else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) { - OS << '<' << CSDN->getAPIntValue() << '>'; - } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) { - if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle) - OS << '<' << CSDN->getValueAPF().convertToFloat() << '>'; - else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble) - OS << '<' << CSDN->getValueAPF().convertToDouble() << '>'; - else { - OS << "<APFloat("; - CSDN->getValueAPF().bitcastToAPInt().dump(); - OS << ")>"; - } - } else if (const GlobalAddressSDNode *GADN = - dyn_cast<GlobalAddressSDNode>(this)) { - int64_t offset = GADN->getOffset(); - OS << '<'; - WriteAsOperand(OS, GADN->getGlobal()); - OS << '>'; - if (offset > 0) - OS << " + " << offset; - else - OS << " " << offset; - if (unsigned int TF = GADN->getTargetFlags()) - OS << " [TF=" << TF << ']'; - } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) { - OS << "<" << FIDN->getIndex() << ">"; - } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) { - OS << "<" << JTDN->getIndex() << ">"; - if (unsigned int TF = JTDN->getTargetFlags()) - OS << " [TF=" << TF << ']'; - } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){ - int offset = CP->getOffset(); - if (CP->isMachineConstantPoolEntry()) - OS << "<" << *CP->getMachineCPVal() << ">"; - else - OS << "<" << *CP->getConstVal() << ">"; - if (offset > 0) - OS << " + " << offset; - else - OS << " " << offset; - if (unsigned int TF = CP->getTargetFlags()) - OS << " [TF=" << TF << ']'; - } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) { - OS << "<"; - const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock(); - if (LBB) - OS << LBB->getName() << " "; - OS << (const void*)BBDN->getBasicBlock() << ">"; - } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) { - OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :0); - } else if (const ExternalSymbolSDNode *ES = - dyn_cast<ExternalSymbolSDNode>(this)) { - OS << "'" << ES->getSymbol() << "'"; - if (unsigned int TF = ES->getTargetFlags()) - OS << " [TF=" << TF << ']'; - } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) { - if (M->getValue()) - OS << "<" << M->getValue() << ">"; - else - OS << "<null>"; - } else if (const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(this)) { - if (MD->getMD()) - OS << "<" << MD->getMD() << ">"; - else - OS << "<null>"; - } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) { - OS << ":" << N->getVT().getEVTString(); - } - else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) { - OS << "<" << *LD->getMemOperand(); - - bool doExt = true; - switch (LD->getExtensionType()) { - default: doExt = false; break; - case ISD::EXTLOAD: OS << ", anyext"; break; - case ISD::SEXTLOAD: OS << ", sext"; break; - case ISD::ZEXTLOAD: OS << ", zext"; break; - } - if (doExt) - OS << " from " << LD->getMemoryVT().getEVTString(); - - const char *AM = getIndexedModeName(LD->getAddressingMode()); - if (*AM) - OS << ", " << AM; - - OS << ">"; - } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) { - OS << "<" << *ST->getMemOperand(); - - if (ST->isTruncatingStore()) - OS << ", trunc to " << ST->getMemoryVT().getEVTString(); - - const char *AM = getIndexedModeName(ST->getAddressingMode()); - if (*AM) - OS << ", " << AM; - - OS << ">"; - } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) { - OS << "<" << *M->getMemOperand() << ">"; - } else if (const BlockAddressSDNode *BA = - dyn_cast<BlockAddressSDNode>(this)) { - OS << "<"; - WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false); - OS << ", "; - WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false); - OS << ">"; - if (unsigned int TF = BA->getTargetFlags()) - OS << " [TF=" << TF << ']'; - } - - if (G) - if (unsigned Order = G->GetOrdering(this)) - OS << " [ORD=" << Order << ']'; - - if (getNodeId() != -1) - OS << " [ID=" << getNodeId() << ']'; - - DebugLoc dl = getDebugLoc(); - if (G && !dl.isUnknown()) { - DIScope - Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext())); - OS << " dbg:"; - // Omit the directory, since it's usually long and uninteresting. - if (Scope.Verify()) - OS << Scope.getFilename(); - else - OS << "<unknown>"; - OS << ':' << dl.getLine(); - if (dl.getCol() != 0) - OS << ':' << dl.getCol(); - } -} - -void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const { - print_types(OS, G); - for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { - if (i) OS << ", "; else OS << " "; - OS << (void*)getOperand(i).getNode(); - if (unsigned RN = getOperand(i).getResNo()) - OS << ":" << RN; - } - print_details(OS, G); -} - -static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N, - const SelectionDAG *G, unsigned depth, - unsigned indent) { - if (depth == 0) - return; - - OS.indent(indent); - - N->print(OS, G); - - if (depth < 1) - return; - - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { - // Don't follow chain operands. - if (N->getOperand(i).getValueType() == MVT::Other) - continue; - OS << '\n'; - printrWithDepthHelper(OS, N->getOperand(i).getNode(), G, depth-1, indent+2); - } -} - -void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G, - unsigned depth) const { - printrWithDepthHelper(OS, this, G, depth, 0); -} - -void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const { - // Don't print impossibly deep things. - printrWithDepth(OS, G, 10); -} - -void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const { - printrWithDepth(dbgs(), G, depth); -} - -void SDNode::dumprFull(const SelectionDAG *G) const { - // Don't print impossibly deep things. - dumprWithDepth(G, 10); -} - -static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) { - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - if (N->getOperand(i).getNode()->hasOneUse()) - DumpNodes(N->getOperand(i).getNode(), indent+2, G); - else - dbgs() << "\n" << std::string(indent+2, ' ') - << (void*)N->getOperand(i).getNode() << ": <multiple use>"; - - - dbgs() << "\n"; - dbgs().indent(indent); - N->dump(G); -} - SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { assert(N->getNumValues() == 1 && "Can't unroll a vector with multiple results!"); @@ -6625,74 +6069,6 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { return 0; } -void SelectionDAG::dump() const { - dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:"; - - for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end(); - I != E; ++I) { - const SDNode *N = I; - if (!N->hasOneUse() && N != getRoot().getNode()) - DumpNodes(N, 2, this); - } - - if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this); - - dbgs() << "\n\n"; -} - -void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const { - print_types(OS, G); - print_details(OS, G); -} - -typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet; -static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent, - const SelectionDAG *G, VisitedSDNodeSet &once) { - if (!once.insert(N)) // If we've been here before, return now. - return; - - // Dump the current SDNode, but don't end the line yet. - OS.indent(indent); - N->printr(OS, G); - - // Having printed this SDNode, walk the children: - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { - const SDNode *child = N->getOperand(i).getNode(); - - if (i) OS << ","; - OS << " "; - - if (child->getNumOperands() == 0) { - // This child has no grandchildren; print it inline right here. - child->printr(OS, G); - once.insert(child); - } else { // Just the address. FIXME: also print the child's opcode. - OS << (void*)child; - if (unsigned RN = N->getOperand(i).getResNo()) - OS << ":" << RN; - } - } - - OS << "\n"; - - // Dump children that have grandchildren on their own line(s). - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { - const SDNode *child = N->getOperand(i).getNode(); - DumpNodesr(OS, child, indent+2, G, once); - } -} - -void SDNode::dumpr() const { - VisitedSDNodeSet once; - DumpNodesr(dbgs(), this, 0, 0, once); -} - -void SDNode::dumpr(const SelectionDAG *G) const { - VisitedSDNodeSet once; - DumpNodesr(dbgs(), this, 0, G, once); -} - - // getAddressSpace - Return the address space this GlobalAddress belongs to. unsigned GlobalAddressSDNode::getAddressSpace() const { return getGlobal()->getType()->getAddressSpace(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 4e4aa11..2ac9655 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -2411,14 +2411,14 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, BranchProbabilityInfo *BPI = FuncInfo.BPI; // Start with "simple" cases - for (size_t i = 0; i < SI.getNumCases(); ++i) { - BasicBlock *SuccBB = SI.getCaseSuccessor(i); + for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end(); + i != e; ++i) { + const BasicBlock *SuccBB = i.getCaseSuccessor(); MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB]; uint32_t ExtraWeight = BPI ? BPI->getEdgeWeight(SI.getParent(), SuccBB) : 0; - Cases.push_back(Case(SI.getCaseValue(i), - SI.getCaseValue(i), + Cases.push_back(Case(i.getCaseValue(), i.getCaseValue(), SMBB, ExtraWeight)); } std::sort(Cases.begin(), Cases.end(), CaseCmp()); @@ -4561,8 +4561,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { const DbgDeclareInst &DI = cast<DbgDeclareInst>(I); MDNode *Variable = DI.getVariable(); const Value *Address = DI.getAddress(); - if (!Address || !DIVariable(Variable).Verify()) + if (!Address || !DIVariable(Variable).Verify()) { + DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); return 0; + } // Build an entry in DbgOrdering. Debug info input nodes get an SDNodeOrder // but do not always have a corresponding SDNode built. The SDNodeOrder diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp new file mode 100644 index 0000000..f981afb --- /dev/null +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -0,0 +1,631 @@ +//===-- SelectionDAGDumper.cpp - Implement SelectionDAG::dump() -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the SelectionDAG::dump method and friends. +// +//===----------------------------------------------------------------------===// + +#include "ScheduleDAGSDNodes.h" +#include "llvm/Function.h" +#include "llvm/Intrinsics.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/StringExtras.h" +using namespace llvm; + +std::string SDNode::getOperationName(const SelectionDAG *G) const { + switch (getOpcode()) { + default: + if (getOpcode() < ISD::BUILTIN_OP_END) + return "<<Unknown DAG Node>>"; + if (isMachineOpcode()) { + if (G) + if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo()) + if (getMachineOpcode() < TII->getNumOpcodes()) + return TII->getName(getMachineOpcode()); + return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>"; + } + if (G) { + const TargetLowering &TLI = G->getTargetLoweringInfo(); + const char *Name = TLI.getTargetNodeName(getOpcode()); + if (Name) return Name; + return "<<Unknown Target Node #" + utostr(getOpcode()) + ">>"; + } + return "<<Unknown Node #" + utostr(getOpcode()) + ">>"; + +#ifndef NDEBUG + case ISD::DELETED_NODE: return "<<Deleted Node!>>"; +#endif + case ISD::PREFETCH: return "Prefetch"; + case ISD::MEMBARRIER: return "MemBarrier"; + case ISD::ATOMIC_FENCE: return "AtomicFence"; + case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap"; + case ISD::ATOMIC_SWAP: return "AtomicSwap"; + case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd"; + case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub"; + case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd"; + case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr"; + case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor"; + case ISD::ATOMIC_LOAD_NAND: return "AtomicLoadNand"; + case ISD::ATOMIC_LOAD_MIN: return "AtomicLoadMin"; + case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax"; + case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin"; + case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax"; + case ISD::ATOMIC_LOAD: return "AtomicLoad"; + case ISD::ATOMIC_STORE: return "AtomicStore"; + case ISD::PCMARKER: return "PCMarker"; + case ISD::READCYCLECOUNTER: return "ReadCycleCounter"; + case ISD::SRCVALUE: return "SrcValue"; + case ISD::MDNODE_SDNODE: return "MDNode"; + case ISD::EntryToken: return "EntryToken"; + case ISD::TokenFactor: return "TokenFactor"; + case ISD::AssertSext: return "AssertSext"; + case ISD::AssertZext: return "AssertZext"; + + case ISD::BasicBlock: return "BasicBlock"; + case ISD::VALUETYPE: return "ValueType"; + case ISD::Register: return "Register"; + case ISD::RegisterMask: return "RegisterMask"; + case ISD::Constant: return "Constant"; + case ISD::ConstantFP: return "ConstantFP"; + case ISD::GlobalAddress: return "GlobalAddress"; + case ISD::GlobalTLSAddress: return "GlobalTLSAddress"; + case ISD::FrameIndex: return "FrameIndex"; + case ISD::JumpTable: return "JumpTable"; + case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE"; + case ISD::RETURNADDR: return "RETURNADDR"; + case ISD::FRAMEADDR: return "FRAMEADDR"; + case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET"; + case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR"; + case ISD::LSDAADDR: return "LSDAADDR"; + case ISD::EHSELECTION: return "EHSELECTION"; + case ISD::EH_RETURN: return "EH_RETURN"; + case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP"; + case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP"; + case ISD::ConstantPool: return "ConstantPool"; + case ISD::ExternalSymbol: return "ExternalSymbol"; + case ISD::BlockAddress: return "BlockAddress"; + case ISD::INTRINSIC_WO_CHAIN: + case ISD::INTRINSIC_VOID: + case ISD::INTRINSIC_W_CHAIN: { + unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1; + unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue(); + if (IID < Intrinsic::num_intrinsics) + return Intrinsic::getName((Intrinsic::ID)IID); + else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo()) + return TII->getName(IID); + llvm_unreachable("Invalid intrinsic ID"); + } + + case ISD::BUILD_VECTOR: return "BUILD_VECTOR"; + case ISD::TargetConstant: return "TargetConstant"; + case ISD::TargetConstantFP: return "TargetConstantFP"; + case ISD::TargetGlobalAddress: return "TargetGlobalAddress"; + case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress"; + case ISD::TargetFrameIndex: return "TargetFrameIndex"; + case ISD::TargetJumpTable: return "TargetJumpTable"; + case ISD::TargetConstantPool: return "TargetConstantPool"; + case ISD::TargetExternalSymbol: return "TargetExternalSymbol"; + case ISD::TargetBlockAddress: return "TargetBlockAddress"; + + case ISD::CopyToReg: return "CopyToReg"; + case ISD::CopyFromReg: return "CopyFromReg"; + case ISD::UNDEF: return "undef"; + case ISD::MERGE_VALUES: return "merge_values"; + case ISD::INLINEASM: return "inlineasm"; + case ISD::EH_LABEL: return "eh_label"; + case ISD::HANDLENODE: return "handlenode"; + + // Unary operators + case ISD::FABS: return "fabs"; + case ISD::FNEG: return "fneg"; + case ISD::FSQRT: return "fsqrt"; + case ISD::FSIN: return "fsin"; + case ISD::FCOS: return "fcos"; + case ISD::FTRUNC: return "ftrunc"; + case ISD::FFLOOR: return "ffloor"; + case ISD::FCEIL: return "fceil"; + case ISD::FRINT: return "frint"; + case ISD::FNEARBYINT: return "fnearbyint"; + case ISD::FEXP: return "fexp"; + case ISD::FEXP2: return "fexp2"; + case ISD::FLOG: return "flog"; + case ISD::FLOG2: return "flog2"; + case ISD::FLOG10: return "flog10"; + + // Binary operators + case ISD::ADD: return "add"; + case ISD::SUB: return "sub"; + case ISD::MUL: return "mul"; + case ISD::MULHU: return "mulhu"; + case ISD::MULHS: return "mulhs"; + case ISD::SDIV: return "sdiv"; + case ISD::UDIV: return "udiv"; + case ISD::SREM: return "srem"; + case ISD::UREM: return "urem"; + case ISD::SMUL_LOHI: return "smul_lohi"; + case ISD::UMUL_LOHI: return "umul_lohi"; + case ISD::SDIVREM: return "sdivrem"; + case ISD::UDIVREM: return "udivrem"; + case ISD::AND: return "and"; + case ISD::OR: return "or"; + case ISD::XOR: return "xor"; + case ISD::SHL: return "shl"; + case ISD::SRA: return "sra"; + case ISD::SRL: return "srl"; + case ISD::ROTL: return "rotl"; + case ISD::ROTR: return "rotr"; + case ISD::FADD: return "fadd"; + case ISD::FSUB: return "fsub"; + case ISD::FMUL: return "fmul"; + case ISD::FDIV: return "fdiv"; + case ISD::FMA: return "fma"; + case ISD::FREM: return "frem"; + case ISD::FCOPYSIGN: return "fcopysign"; + case ISD::FGETSIGN: return "fgetsign"; + case ISD::FPOW: return "fpow"; + + case ISD::FPOWI: return "fpowi"; + case ISD::SETCC: return "setcc"; + case ISD::SELECT: return "select"; + case ISD::VSELECT: return "vselect"; + case ISD::SELECT_CC: return "select_cc"; + case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt"; + case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt"; + case ISD::CONCAT_VECTORS: return "concat_vectors"; + case ISD::INSERT_SUBVECTOR: return "insert_subvector"; + case ISD::EXTRACT_SUBVECTOR: return "extract_subvector"; + case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector"; + case ISD::VECTOR_SHUFFLE: return "vector_shuffle"; + case ISD::CARRY_FALSE: return "carry_false"; + case ISD::ADDC: return "addc"; + case ISD::ADDE: return "adde"; + case ISD::SADDO: return "saddo"; + case ISD::UADDO: return "uaddo"; + case ISD::SSUBO: return "ssubo"; + case ISD::USUBO: return "usubo"; + case ISD::SMULO: return "smulo"; + case ISD::UMULO: return "umulo"; + case ISD::SUBC: return "subc"; + case ISD::SUBE: return "sube"; + case ISD::SHL_PARTS: return "shl_parts"; + case ISD::SRA_PARTS: return "sra_parts"; + case ISD::SRL_PARTS: return "srl_parts"; + + // Conversion operators. + case ISD::SIGN_EXTEND: return "sign_extend"; + case ISD::ZERO_EXTEND: return "zero_extend"; + case ISD::ANY_EXTEND: return "any_extend"; + case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg"; + case ISD::TRUNCATE: return "truncate"; + case ISD::FP_ROUND: return "fp_round"; + case ISD::FLT_ROUNDS_: return "flt_rounds"; + case ISD::FP_ROUND_INREG: return "fp_round_inreg"; + case ISD::FP_EXTEND: return "fp_extend"; + + case ISD::SINT_TO_FP: return "sint_to_fp"; + case ISD::UINT_TO_FP: return "uint_to_fp"; + case ISD::FP_TO_SINT: return "fp_to_sint"; + case ISD::FP_TO_UINT: return "fp_to_uint"; + case ISD::BITCAST: return "bitcast"; + case ISD::FP16_TO_FP32: return "fp16_to_fp32"; + case ISD::FP32_TO_FP16: return "fp32_to_fp16"; + + case ISD::CONVERT_RNDSAT: { + switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) { + default: llvm_unreachable("Unknown cvt code!"); + case ISD::CVT_FF: return "cvt_ff"; + case ISD::CVT_FS: return "cvt_fs"; + case ISD::CVT_FU: return "cvt_fu"; + case ISD::CVT_SF: return "cvt_sf"; + case ISD::CVT_UF: return "cvt_uf"; + case ISD::CVT_SS: return "cvt_ss"; + case ISD::CVT_SU: return "cvt_su"; + case ISD::CVT_US: return "cvt_us"; + case ISD::CVT_UU: return "cvt_uu"; + } + } + + // Control flow instructions + case ISD::BR: return "br"; + case ISD::BRIND: return "brind"; + case ISD::BR_JT: return "br_jt"; + case ISD::BRCOND: return "brcond"; + case ISD::BR_CC: return "br_cc"; + case ISD::CALLSEQ_START: return "callseq_start"; + case ISD::CALLSEQ_END: return "callseq_end"; + + // Other operators + case ISD::LOAD: return "load"; + case ISD::STORE: return "store"; + case ISD::VAARG: return "vaarg"; + case ISD::VACOPY: return "vacopy"; + case ISD::VAEND: return "vaend"; + case ISD::VASTART: return "vastart"; + case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc"; + case ISD::EXTRACT_ELEMENT: return "extract_element"; + case ISD::BUILD_PAIR: return "build_pair"; + case ISD::STACKSAVE: return "stacksave"; + case ISD::STACKRESTORE: return "stackrestore"; + case ISD::TRAP: return "trap"; + + // Bit manipulation + case ISD::BSWAP: return "bswap"; + case ISD::CTPOP: return "ctpop"; + case ISD::CTTZ: return "cttz"; + case ISD::CTTZ_ZERO_UNDEF: return "cttz_zero_undef"; + case ISD::CTLZ: return "ctlz"; + case ISD::CTLZ_ZERO_UNDEF: return "ctlz_zero_undef"; + + // Trampolines + case ISD::INIT_TRAMPOLINE: return "init_trampoline"; + case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline"; + + case ISD::CONDCODE: + switch (cast<CondCodeSDNode>(this)->get()) { + default: llvm_unreachable("Unknown setcc condition!"); + case ISD::SETOEQ: return "setoeq"; + case ISD::SETOGT: return "setogt"; + case ISD::SETOGE: return "setoge"; + case ISD::SETOLT: return "setolt"; + case ISD::SETOLE: return "setole"; + case ISD::SETONE: return "setone"; + + case ISD::SETO: return "seto"; + case ISD::SETUO: return "setuo"; + case ISD::SETUEQ: return "setue"; + case ISD::SETUGT: return "setugt"; + case ISD::SETUGE: return "setuge"; + case ISD::SETULT: return "setult"; + case ISD::SETULE: return "setule"; + case ISD::SETUNE: return "setune"; + + case ISD::SETEQ: return "seteq"; + case ISD::SETGT: return "setgt"; + case ISD::SETGE: return "setge"; + case ISD::SETLT: return "setlt"; + case ISD::SETLE: return "setle"; + case ISD::SETNE: return "setne"; + + case ISD::SETTRUE: return "settrue"; + case ISD::SETTRUE2: return "settrue2"; + case ISD::SETFALSE: return "setfalse"; + case ISD::SETFALSE2: return "setfalse2"; + } + } +} + +const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) { + switch (AM) { + default: return ""; + case ISD::PRE_INC: return "<pre-inc>"; + case ISD::PRE_DEC: return "<pre-dec>"; + case ISD::POST_INC: return "<post-inc>"; + case ISD::POST_DEC: return "<post-dec>"; + } +} + +void SDNode::dump() const { dump(0); } +void SDNode::dump(const SelectionDAG *G) const { + print(dbgs(), G); + dbgs() << '\n'; +} + +void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const { + OS << (void*)this << ": "; + + for (unsigned i = 0, e = getNumValues(); i != e; ++i) { + if (i) OS << ","; + if (getValueType(i) == MVT::Other) + OS << "ch"; + else + OS << getValueType(i).getEVTString(); + } + OS << " = " << getOperationName(G); +} + +void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { + if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) { + if (!MN->memoperands_empty()) { + OS << "<"; + OS << "Mem:"; + for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(), + e = MN->memoperands_end(); i != e; ++i) { + OS << **i; + if (llvm::next(i) != e) + OS << " "; + } + OS << ">"; + } + } else if (const ShuffleVectorSDNode *SVN = + dyn_cast<ShuffleVectorSDNode>(this)) { + OS << "<"; + for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) { + int Idx = SVN->getMaskElt(i); + if (i) OS << ","; + if (Idx < 0) + OS << "u"; + else + OS << Idx; + } + OS << ">"; + } else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) { + OS << '<' << CSDN->getAPIntValue() << '>'; + } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) { + if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle) + OS << '<' << CSDN->getValueAPF().convertToFloat() << '>'; + else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble) + OS << '<' << CSDN->getValueAPF().convertToDouble() << '>'; + else { + OS << "<APFloat("; + CSDN->getValueAPF().bitcastToAPInt().dump(); + OS << ")>"; + } + } else if (const GlobalAddressSDNode *GADN = + dyn_cast<GlobalAddressSDNode>(this)) { + int64_t offset = GADN->getOffset(); + OS << '<'; + WriteAsOperand(OS, GADN->getGlobal()); + OS << '>'; + if (offset > 0) + OS << " + " << offset; + else + OS << " " << offset; + if (unsigned int TF = GADN->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) { + OS << "<" << FIDN->getIndex() << ">"; + } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) { + OS << "<" << JTDN->getIndex() << ">"; + if (unsigned int TF = JTDN->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){ + int offset = CP->getOffset(); + if (CP->isMachineConstantPoolEntry()) + OS << "<" << *CP->getMachineCPVal() << ">"; + else + OS << "<" << *CP->getConstVal() << ">"; + if (offset > 0) + OS << " + " << offset; + else + OS << " " << offset; + if (unsigned int TF = CP->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) { + OS << "<"; + const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock(); + if (LBB) + OS << LBB->getName() << " "; + OS << (const void*)BBDN->getBasicBlock() << ">"; + } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) { + OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :0); + } else if (const ExternalSymbolSDNode *ES = + dyn_cast<ExternalSymbolSDNode>(this)) { + OS << "'" << ES->getSymbol() << "'"; + if (unsigned int TF = ES->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) { + if (M->getValue()) + OS << "<" << M->getValue() << ">"; + else + OS << "<null>"; + } else if (const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(this)) { + if (MD->getMD()) + OS << "<" << MD->getMD() << ">"; + else + OS << "<null>"; + } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) { + OS << ":" << N->getVT().getEVTString(); + } + else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) { + OS << "<" << *LD->getMemOperand(); + + bool doExt = true; + switch (LD->getExtensionType()) { + default: doExt = false; break; + case ISD::EXTLOAD: OS << ", anyext"; break; + case ISD::SEXTLOAD: OS << ", sext"; break; + case ISD::ZEXTLOAD: OS << ", zext"; break; + } + if (doExt) + OS << " from " << LD->getMemoryVT().getEVTString(); + + const char *AM = getIndexedModeName(LD->getAddressingMode()); + if (*AM) + OS << ", " << AM; + + OS << ">"; + } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) { + OS << "<" << *ST->getMemOperand(); + + if (ST->isTruncatingStore()) + OS << ", trunc to " << ST->getMemoryVT().getEVTString(); + + const char *AM = getIndexedModeName(ST->getAddressingMode()); + if (*AM) + OS << ", " << AM; + + OS << ">"; + } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) { + OS << "<" << *M->getMemOperand() << ">"; + } else if (const BlockAddressSDNode *BA = + dyn_cast<BlockAddressSDNode>(this)) { + OS << "<"; + WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false); + OS << ", "; + WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false); + OS << ">"; + if (unsigned int TF = BA->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } + + if (G) + if (unsigned Order = G->GetOrdering(this)) + OS << " [ORD=" << Order << ']'; + + if (getNodeId() != -1) + OS << " [ID=" << getNodeId() << ']'; + + DebugLoc dl = getDebugLoc(); + if (G && !dl.isUnknown()) { + DIScope + Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext())); + OS << " dbg:"; + // Omit the directory, since it's usually long and uninteresting. + if (Scope.Verify()) + OS << Scope.getFilename(); + else + OS << "<unknown>"; + OS << ':' << dl.getLine(); + if (dl.getCol() != 0) + OS << ':' << dl.getCol(); + } +} + +static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (N->getOperand(i).getNode()->hasOneUse()) + DumpNodes(N->getOperand(i).getNode(), indent+2, G); + else + dbgs() << "\n" << std::string(indent+2, ' ') + << (void*)N->getOperand(i).getNode() << ": <multiple use>"; + + dbgs() << '\n'; + dbgs().indent(indent); + N->dump(G); +} + +void SelectionDAG::dump() const { + dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:"; + + for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end(); + I != E; ++I) { + const SDNode *N = I; + if (!N->hasOneUse() && N != getRoot().getNode()) + DumpNodes(N, 2, this); + } + + if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this); + dbgs() << "\n\n"; +} + +void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const { + print_types(OS, G); + print_details(OS, G); +} + +typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet; +static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent, + const SelectionDAG *G, VisitedSDNodeSet &once) { + if (!once.insert(N)) // If we've been here before, return now. + return; + + // Dump the current SDNode, but don't end the line yet. + OS.indent(indent); + N->printr(OS, G); + + // Having printed this SDNode, walk the children: + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + const SDNode *child = N->getOperand(i).getNode(); + + if (i) OS << ","; + OS << " "; + + if (child->getNumOperands() == 0) { + // This child has no grandchildren; print it inline right here. + child->printr(OS, G); + once.insert(child); + } else { // Just the address. FIXME: also print the child's opcode. + OS << (void*)child; + if (unsigned RN = N->getOperand(i).getResNo()) + OS << ":" << RN; + } + } + + OS << "\n"; + + // Dump children that have grandchildren on their own line(s). + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + const SDNode *child = N->getOperand(i).getNode(); + DumpNodesr(OS, child, indent+2, G, once); + } +} + +void SDNode::dumpr() const { + VisitedSDNodeSet once; + DumpNodesr(dbgs(), this, 0, 0, once); +} + +void SDNode::dumpr(const SelectionDAG *G) const { + VisitedSDNodeSet once; + DumpNodesr(dbgs(), this, 0, G, once); +} + +static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N, + const SelectionDAG *G, unsigned depth, + unsigned indent) { + if (depth == 0) + return; + + OS.indent(indent); + + N->print(OS, G); + + if (depth < 1) + return; + + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + // Don't follow chain operands. + if (N->getOperand(i).getValueType() == MVT::Other) + continue; + OS << '\n'; + printrWithDepthHelper(OS, N->getOperand(i).getNode(), G, depth-1, indent+2); + } +} + +void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G, + unsigned depth) const { + printrWithDepthHelper(OS, this, G, depth, 0); +} + +void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const { + // Don't print impossibly deep things. + printrWithDepth(OS, G, 10); +} + +void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const { + printrWithDepth(dbgs(), G, depth); +} + +void SDNode::dumprFull(const SelectionDAG *G) const { + // Don't print impossibly deep things. + dumprWithDepth(G, 10); +} + +void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const { + print_types(OS, G); + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + if (i) OS << ", "; else OS << " "; + OS << (void*)getOperand(i).getNode(); + if (unsigned RN = getOperand(i).getResNo()) + OS << ":" << RN; + } + print_details(OS, G); +} diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 2173d8d..8aabc02 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -673,7 +673,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { { NamedRegionTimer T("Instruction Scheduling", GroupName, TimePassesIsEnabled); - Scheduler->Run(CurDAG, FuncInfo->MBB, FuncInfo->InsertPt); + Scheduler->Run(CurDAG, FuncInfo->MBB); } if (ViewSUnitDAGs) Scheduler->viewGraph(); @@ -684,8 +684,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { { NamedRegionTimer T("Instruction Creation", GroupName, TimePassesIsEnabled); - LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule(); - FuncInfo->InsertPt = Scheduler->InsertPos; + // FuncInfo->InsertPt is passed by reference and set to the end of the + // scheduled instructions. + LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule(FuncInfo->InsertPt); } // If the block was split, make sure we update any references that are used to @@ -774,7 +775,7 @@ void SelectionDAGISel::PrepareEHLandingPad() { // Assign the call site to the landing pad's begin label. MF->getMMI().setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]); - + const MCInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL); BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II) .addSym(Label); @@ -934,9 +935,9 @@ static void collectFailStats(const Instruction *I) { case Instruction::FPToSI: NumFastIselFailFPToSI++; return; case Instruction::UIToFP: NumFastIselFailUIToFP++; return; case Instruction::SIToFP: NumFastIselFailSIToFP++; return; - case Instruction::IntToPtr: NumFastIselFailIntToPtr++; return; + case Instruction::IntToPtr: NumFastIselFailIntToPtr++; return; case Instruction::PtrToInt: NumFastIselFailPtrToInt++; return; - case Instruction::BitCast: NumFastIselFailBitCast++; return; + case Instruction::BitCast: NumFastIselFailBitCast++; return; // Other instructions... case Instruction::ICmp: NumFastIselFailICmp++; return; diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index 5412c97..9a86f32 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -1,4 +1,4 @@ -//===- SjLjEHPass.cpp - Eliminate Invoke & Unwind instructions -----------===// +//===- SjLjEHPrepare.cpp - Eliminate Invoke & Unwind instructions ---------===// // // The LLVM Compiler Infrastructure // @@ -42,7 +42,7 @@ STATISTIC(NumInvokes, "Number of invokes replaced"); STATISTIC(NumSpilled, "Number of registers live across unwind edges"); namespace { - class SjLjEHPass : public FunctionPass { + class SjLjEHPrepare : public FunctionPass { const TargetLowering *TLI; Type *FunctionContextTy; Constant *RegisterFn; @@ -58,7 +58,7 @@ namespace { AllocaInst *FuncCtx; public: static char ID; // Pass identification, replacement for typeid - explicit SjLjEHPass(const TargetLowering *tli = NULL) + explicit SjLjEHPrepare(const TargetLowering *tli = NULL) : FunctionPass(ID), TLI(tli) { } bool doInitialization(Module &M); bool runOnFunction(Function &F); @@ -79,15 +79,15 @@ namespace { }; } // end anonymous namespace -char SjLjEHPass::ID = 0; +char SjLjEHPrepare::ID = 0; -// Public Interface To the SjLjEHPass pass. -FunctionPass *llvm::createSjLjEHPass(const TargetLowering *TLI) { - return new SjLjEHPass(TLI); +// Public Interface To the SjLjEHPrepare pass. +FunctionPass *llvm::createSjLjEHPreparePass(const TargetLowering *TLI) { + return new SjLjEHPrepare(TLI); } // doInitialization - Set up decalarations and types needed to process // exceptions. -bool SjLjEHPass::doInitialization(Module &M) { +bool SjLjEHPrepare::doInitialization(Module &M) { // Build the function context structure. // builtin_setjmp uses a five word jbuf Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext()); @@ -123,7 +123,7 @@ bool SjLjEHPass::doInitialization(Module &M) { /// insertCallSiteStore - Insert a store of the call-site value to the /// function context -void SjLjEHPass::insertCallSiteStore(Instruction *I, int Number) { +void SjLjEHPrepare::insertCallSiteStore(Instruction *I, int Number) { IRBuilder<> Builder(I); // Get a reference to the call_site field. @@ -151,8 +151,8 @@ static void MarkBlocksLiveIn(BasicBlock *BB, /// substituteLPadValues - Substitute the values returned by the landingpad /// instruction with those returned by the personality function. -void SjLjEHPass::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, - Value *SelVal) { +void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, + Value *SelVal) { SmallVector<Value*, 8> UseWorkList(LPI->use_begin(), LPI->use_end()); while (!UseWorkList.empty()) { Value *Val = UseWorkList.pop_back_val(); @@ -183,7 +183,7 @@ void SjLjEHPass::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, /// setupFunctionContext - Allocate the function context on the stack and fill /// it with all of the data that we know at this point. -Value *SjLjEHPass:: +Value *SjLjEHPrepare:: setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) { BasicBlock *EntryBB = F.begin(); @@ -251,7 +251,7 @@ setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) { /// specially, we lower each arg to a copy instruction in the entry block. This /// ensures that the argument value itself cannot be live out of the entry /// block. -void SjLjEHPass::lowerIncomingArguments(Function &F) { +void SjLjEHPrepare::lowerIncomingArguments(Function &F) { BasicBlock::iterator AfterAllocaInsPt = F.begin()->begin(); while (isa<AllocaInst>(AfterAllocaInsPt) && isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsPt)->getArraySize())) @@ -295,8 +295,8 @@ void SjLjEHPass::lowerIncomingArguments(Function &F) { /// lowerAcrossUnwindEdges - Find all variables which are alive across an unwind /// edge and spill them. -void SjLjEHPass::lowerAcrossUnwindEdges(Function &F, - ArrayRef<InvokeInst*> Invokes) { +void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, + ArrayRef<InvokeInst*> Invokes) { // Finally, scan the code looking for instructions with bad live ranges. for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) { @@ -393,7 +393,7 @@ void SjLjEHPass::lowerAcrossUnwindEdges(Function &F, /// setupEntryBlockAndCallSites - Setup the entry block by creating and filling /// the function context and marking the call sites with the appropriate /// values. These values are used by the DWARF EH emitter. -bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) { +bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { SmallVector<ReturnInst*, 16> Returns; SmallVector<InvokeInst*, 16> Invokes; SmallSetVector<LandingPadInst*, 16> LPads; @@ -519,7 +519,7 @@ bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) { return true; } -bool SjLjEHPass::runOnFunction(Function &F) { +bool SjLjEHPrepare::runOnFunction(Function &F) { bool Res = setupEntryBlockAndCallSites(F); return Res; } |
