diff options
author | Stephen Hines <srhines@google.com> | 2014-02-11 20:01:10 -0800 |
---|---|---|
committer | Stephen Hines <srhines@google.com> | 2014-02-11 20:01:10 -0800 |
commit | ce9904c6ea8fd669978a8eefb854b330eb9828ff (patch) | |
tree | 2418ee2e96ea220977c8fb74959192036ab5b133 /lib/CodeGen | |
parent | c27b10b198c1d9e9b51f2303994313ec2778edd7 (diff) | |
parent | dbb832b83351cec97b025b61c26536ef50c3181c (diff) | |
download | external_llvm-ce9904c6ea8fd669978a8eefb854b330eb9828ff.zip external_llvm-ce9904c6ea8fd669978a8eefb854b330eb9828ff.tar.gz external_llvm-ce9904c6ea8fd669978a8eefb854b330eb9828ff.tar.bz2 |
Merge remote-tracking branch 'upstream/release_34' into merge-20140211
Conflicts:
lib/Linker/LinkModules.cpp
lib/Support/Unix/Signals.inc
Change-Id: Ia54f291fa5dc828052d2412736e8495c1282aa64
Diffstat (limited to 'lib/CodeGen')
104 files changed, 9241 insertions, 6519 deletions
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index e079707..2ee7767 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -782,7 +782,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( if (MI == CriticalPathMI) { CriticalPathSU = CriticalPathStep(CriticalPathSU); CriticalPathMI = (CriticalPathSU) ? CriticalPathSU->getInstr() : 0; - } else { + } else if (CriticalPathSet.any()) { ExcludeRegs = &CriticalPathSet; } diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index ca08b5b..1600c67 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -320,6 +320,7 @@ static const Value *getNoopInput(const Value *V, static bool slotOnlyDiscardsData(const Value *RetVal, const Value *CallVal, SmallVectorImpl<unsigned> &RetIndices, SmallVectorImpl<unsigned> &CallIndices, + bool AllowDifferingSizes, const TargetLoweringBase &TLI) { // Trace the sub-value needed by the return value as far back up the graph as @@ -350,7 +351,8 @@ static bool slotOnlyDiscardsData(const Value *RetVal, const Value *CallVal, // all the bits that are needed by the "ret" have been provided by the "tail // call". FIXME: with sufficiently cunning bit-tracking, we could look through // extensions too. - if (BitsProvided < BitsRequired) + if (BitsProvided < BitsRequired || + (!AllowDifferingSizes && BitsProvided != BitsRequired)) return false; return true; @@ -382,9 +384,8 @@ static bool indexReallyValid(CompositeType *T, unsigned Idx) { /// function again on a finished iterator will repeatedly return /// false. SubTypes.back()->getTypeAtIndex(Path.back()) is either an empty /// aggregate or a non-aggregate -static bool -advanceToNextLeafType(SmallVectorImpl<CompositeType *> &SubTypes, - SmallVectorImpl<unsigned> &Path) { +static bool advanceToNextLeafType(SmallVectorImpl<CompositeType *> &SubTypes, + SmallVectorImpl<unsigned> &Path) { // First march back up the tree until we can successfully increment one of the // coordinates in Path. while (!Path.empty() && !indexReallyValid(SubTypes.back(), Path.back() + 1)) { @@ -454,8 +455,8 @@ static bool firstRealType(Type *Next, /// Set the iterator data-structures to the next non-empty, non-aggregate /// subtype. -bool nextRealType(SmallVectorImpl<CompositeType *> &SubTypes, - SmallVectorImpl<unsigned> &Path) { +static bool nextRealType(SmallVectorImpl<CompositeType *> &SubTypes, + SmallVectorImpl<unsigned> &Path) { do { if (!advanceToNextLeafType(SubTypes, Path)) return false; @@ -509,6 +510,13 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, return false; } + return returnTypeIsEligibleForTailCall(ExitBB->getParent(), I, Ret, TLI); +} + +bool llvm::returnTypeIsEligibleForTailCall(const Function *F, + const Instruction *I, + const ReturnInst *Ret, + const TargetLoweringBase &TLI) { // If the block ends with a void return or unreachable, it doesn't matter // what the call's return type is. if (!Ret || Ret->getNumOperands() == 0) return true; @@ -517,19 +525,38 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, // return type is. if (isa<UndefValue>(Ret->getOperand(0))) return true; - // Conservatively require the attributes of the call to match those of - // the return. Ignore noalias because it doesn't affect the call sequence. - const Function *F = ExitBB->getParent(); - AttributeSet CallerAttrs = F->getAttributes(); - if (AttrBuilder(CallerAttrs, AttributeSet::ReturnIndex). - removeAttribute(Attribute::NoAlias) != - AttrBuilder(CallerAttrs, AttributeSet::ReturnIndex). - removeAttribute(Attribute::NoAlias)) - return false; + // Make sure the attributes attached to each return are compatible. + AttrBuilder CallerAttrs(F->getAttributes(), + AttributeSet::ReturnIndex); + AttrBuilder CalleeAttrs(cast<CallInst>(I)->getAttributes(), + AttributeSet::ReturnIndex); + + // Noalias is completely benign as far as calling convention goes, it + // shouldn't affect whether the call is a tail call. + CallerAttrs = CallerAttrs.removeAttribute(Attribute::NoAlias); + CalleeAttrs = CalleeAttrs.removeAttribute(Attribute::NoAlias); + + bool AllowDifferingSizes = true; + if (CallerAttrs.contains(Attribute::ZExt)) { + if (!CalleeAttrs.contains(Attribute::ZExt)) + return false; + + AllowDifferingSizes = false; + CallerAttrs.removeAttribute(Attribute::ZExt); + CalleeAttrs.removeAttribute(Attribute::ZExt); + } else if (CallerAttrs.contains(Attribute::SExt)) { + if (!CalleeAttrs.contains(Attribute::SExt)) + return false; + + AllowDifferingSizes = false; + CallerAttrs.removeAttribute(Attribute::SExt); + CalleeAttrs.removeAttribute(Attribute::SExt); + } - // It's not safe to eliminate the sign / zero extension of the return value. - if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) || - CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) + // If they're still different, there's some facet we don't understand + // (currently only "inreg", but in future who knows). It may be OK but the + // only safe option is to reject the tail call. + if (CallerAttrs != CalleeAttrs) return false; const Value *RetVal = Ret->getOperand(0), *CallVal = I; @@ -571,7 +598,8 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, // Finally, we can check whether the value produced by the tail call at this // index is compatible with the value we return. - if (!slotOnlyDiscardsData(RetVal, CallVal, TmpRetPath, TmpCallPath, TLI)) + if (!slotOnlyDiscardsData(RetVal, CallVal, TmpRetPath, TmpCallPath, + AllowDifferingSizes, TLI)) return false; CallEmpty = !nextRealType(CallSubTypes, CallPath); diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp index 188047d..5d82dd9 100644 --- a/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -47,13 +47,18 @@ ARMException::ARMException(AsmPrinter *A) ARMException::~ARMException() {} +ARMTargetStreamer &ARMException::getTargetStreamer() { + MCTargetStreamer &TS = Asm->OutStreamer.getTargetStreamer(); + return static_cast<ARMTargetStreamer &>(TS); +} + void ARMException::EndModule() { } /// BeginFunction - Gather pre-function exception information. Assumes it's /// being emitted immediately after the function entry point. void ARMException::BeginFunction(const MachineFunction *MF) { - Asm->OutStreamer.EmitFnStart(); + getTargetStreamer().emitFnStart(); if (Asm->MF->getFunction()->needsUnwindTableEntry()) Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber())); @@ -62,8 +67,9 @@ void ARMException::BeginFunction(const MachineFunction *MF) { /// EndFunction - Gather and emit post-function exception information. /// void ARMException::EndFunction() { + ARMTargetStreamer &ATS = getTargetStreamer(); if (!Asm->MF->getFunction()->needsUnwindTableEntry()) - Asm->OutStreamer.EmitCantUnwind(); + ATS.emitCantUnwind(); else { Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber())); @@ -76,13 +82,13 @@ void ARMException::EndFunction() { // Emit references to personality. if (const Function * Personality = MMI->getPersonalities()[MMI->getPersonalityIndex()]) { - MCSymbol *PerSym = Asm->Mang->getSymbol(Personality); + MCSymbol *PerSym = Asm->getSymbol(Personality); Asm->OutStreamer.EmitSymbolAttribute(PerSym, MCSA_Global); - Asm->OutStreamer.EmitPersonality(PerSym); + ATS.emitPersonality(PerSym); } // Emit .handlerdata directive. - Asm->OutStreamer.EmitHandlerData(); + ATS.emitHandlerData(); // Emit actual exception table EmitExceptionTable(); @@ -90,7 +96,7 @@ void ARMException::EndFunction() { } } - Asm->OutStreamer.EmitFnEnd(); + ATS.emitFnEnd(); } void ARMException::EmitTypeInfos(unsigned TTypeEncoding) { diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 12c3574..308b0e0 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -48,6 +48,7 @@ #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Transforms/Utils/GlobalStatus.h" using namespace llvm; static const char *const DWARFGroupName = "DWARF Emission"; @@ -94,7 +95,7 @@ static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &TD, AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer) : MachineFunctionPass(ID), - TM(tm), MAI(tm.getMCAsmInfo()), + TM(tm), MAI(tm.getMCAsmInfo()), MII(tm.getInstrInfo()), OutContext(Streamer.getContext()), OutStreamer(Streamer), LastMI(0), LastFn(0), Counter(~0U), SetCounter(0) { @@ -164,7 +165,7 @@ bool AsmPrinter::doInitialization(Module &M) { OutStreamer.InitStreamer(); - Mang = new Mangler(OutContext, &TM); + Mang = new Mangler(&TM); // Allow the target to emit any magic that it wants at the start of the file. EmitStartOfAsmFile(M); @@ -212,12 +213,12 @@ bool AsmPrinter::doInitialization(Module &M) { llvm_unreachable("Unknown exception type."); } -void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const { - switch ((GlobalValue::LinkageTypes)Linkage) { +void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const { + GlobalValue::LinkageTypes Linkage = GV->getLinkage(); + switch (Linkage) { case GlobalValue::CommonLinkage: case GlobalValue::LinkOnceAnyLinkage: case GlobalValue::LinkOnceODRLinkage: - case GlobalValue::LinkOnceODRAutoHideLinkage: case GlobalValue::WeakAnyLinkage: case GlobalValue::WeakODRLinkage: case GlobalValue::LinkerPrivateWeakLinkage: @@ -225,8 +226,19 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const { // .globl _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); - if ((GlobalValue::LinkageTypes)Linkage != - GlobalValue::LinkOnceODRAutoHideLinkage) + bool CanBeHidden = false; + + if (Linkage == GlobalValue::LinkOnceODRLinkage) { + if (GV->hasUnnamedAddr()) { + CanBeHidden = true; + } else { + GlobalStatus GS; + if (!GlobalStatus::analyzeGlobal(GV, GS) && !GS.IsCompared) + CanBeHidden = true; + } + } + + if (!CanBeHidden) // .weak_definition _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition); else @@ -239,7 +251,7 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const { // .weak _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Weak); } - break; + return; case GlobalValue::DLLExportLinkage: case GlobalValue::AppendingLinkage: // FIXME: appending linkage variables should go into a section of @@ -248,16 +260,23 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const { // If external or appending, declare as a global symbol. // .globl _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); - break; + return; case GlobalValue::PrivateLinkage: case GlobalValue::InternalLinkage: case GlobalValue::LinkerPrivateLinkage: - break; - default: - llvm_unreachable("Unknown linkage type!"); + return; + case GlobalValue::AvailableExternallyLinkage: + llvm_unreachable("Should never emit this"); + case GlobalValue::DLLImportLinkage: + case GlobalValue::ExternalWeakLinkage: + llvm_unreachable("Don't know how to emit these"); } + llvm_unreachable("Unknown linkage type!"); } +MCSymbol *AsmPrinter::getSymbol(const GlobalValue *GV) const { + return getObjFileLowering().getSymbol(*Mang, GV); +} /// EmitGlobalVariable - Emit the specified global variable to the .s file. void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { @@ -273,7 +292,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { } } - MCSymbol *GVSym = Mang->getSymbol(GV); + MCSymbol *GVSym = getSymbol(GV); EmitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration()); if (!GV->hasInitializer()) // External globals require no extra code. @@ -284,13 +303,16 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM); - const DataLayout *TD = TM.getDataLayout(); - uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType()); + const DataLayout *DL = TM.getDataLayout(); + uint64_t Size = DL->getTypeAllocSize(GV->getType()->getElementType()); // If the alignment is specified, we *must* obey it. Overaligning a global // with a specified alignment is a prompt way to break globals emitted to // sections and expected to be contiguous (e.g. ObjC metadata). - unsigned AlignLog = getGVAlignmentLog2(GV, *TD); + unsigned AlignLog = getGVAlignmentLog2(GV, *DL); + + if (DD) + DD->setSymbolSize(GVSym, Size); // Handle common and BSS local symbols (.lcomm). if (GVKind.isCommon() || GVKind.isBSSLocal()) { @@ -388,14 +410,14 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { OutStreamer.SwitchSection(TLVSect); // Emit the linkage here. - EmitLinkage(GV->getLinkage(), GVSym); + EmitLinkage(GV, GVSym); OutStreamer.EmitLabel(GVSym); // Three pointers in size: // - __tlv_bootstrap - used to make sure support exists // - spare pointer, used when mapped by the runtime // - pointer to mangled symbol above with initializer - unsigned PtrSize = TD->getPointerSizeInBits()/8; + unsigned PtrSize = DL->getPointerTypeSize(GV->getType()); OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"), PtrSize); OutStreamer.EmitIntValue(0, PtrSize); @@ -407,7 +429,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { OutStreamer.SwitchSection(TheSection); - EmitLinkage(GV->getLinkage(), GVSym); + EmitLinkage(GV, GVSym); EmitAlignment(AlignLog, GV); OutStreamer.EmitLabel(GVSym); @@ -433,7 +455,7 @@ void AsmPrinter::EmitFunctionHeader() { OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM)); EmitVisibility(CurrentFnSym, F->getVisibility()); - EmitLinkage(F->getLinkage(), CurrentFnSym); + EmitLinkage(F, CurrentFnSym); EmitAlignment(MF->getAlignment(), F); if (MAI->hasDotTypeDotSizeDirective()) @@ -459,16 +481,6 @@ void AsmPrinter::EmitFunctionHeader() { OutStreamer.EmitLabel(DeadBlockSyms[i]); } - // Add some workaround for linkonce linkage on Cygwin\MinGW. - if (MAI->getLinkOnceDirective() != 0 && - (F->hasLinkOnceLinkage() || F->hasWeakLinkage())) { - // FIXME: What is this? - MCSymbol *FakeStub = - OutContext.GetOrCreateSymbol(Twine("Lllvm$workaround$fake$stub$")+ - CurrentFnSym->getName()); - OutStreamer.EmitLabel(FakeStub); - } - // Emit pre-function debug and/or EH information. if (DE) { NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled); @@ -478,6 +490,10 @@ void AsmPrinter::EmitFunctionHeader() { NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); DD->beginFunction(MF); } + + // Emit the prefix data. + if (F->hasPrefixData()) + EmitGlobalConstant(F->getPrefixData()); } /// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the @@ -530,11 +546,11 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) { /// emitImplicitDef - This method emits the specified machine instruction /// that is an implicit def. -static void emitImplicitDef(const MachineInstr *MI, AsmPrinter &AP) { +void AsmPrinter::emitImplicitDef(const MachineInstr *MI) const { unsigned RegNo = MI->getOperand(0).getReg(); - AP.OutStreamer.AddComment(Twine("implicit-def: ") + - AP.TM.getRegisterInfo()->getName(RegNo)); - AP.OutStreamer.AddBlankLine(); + OutStreamer.AddComment(Twine("implicit-def: ") + + TM.getRegisterInfo()->getName(RegNo)); + OutStreamer.AddBlankLine(); } static void emitKill(const MachineInstr *MI, AsmPrinter &AP) { @@ -646,7 +662,7 @@ bool AsmPrinter::needsRelocationsForDwarfStringPool() const { } void AsmPrinter::emitPrologLabel(const MachineInstr &MI) { - MCSymbol *Label = MI.getOperand(0).getMCSymbol(); + const MCSymbol *Label = MI.getOperand(0).getMCSymbol(); if (MAI->getExceptionHandlingType() != ExceptionHandling::DwarfCFI) return; @@ -657,12 +673,12 @@ void AsmPrinter::emitPrologLabel(const MachineInstr &MI) { if (MMI->getCompactUnwindEncoding() != 0) OutStreamer.EmitCompactUnwindEncoding(MMI->getCompactUnwindEncoding()); - MachineModuleInfo &MMI = MF->getMMI(); - std::vector<MCCFIInstruction> Instructions = MMI.getFrameInstructions(); + const MachineModuleInfo &MMI = MF->getMMI(); + const std::vector<MCCFIInstruction> &Instrs = MMI.getFrameInstructions(); bool FoundOne = false; (void)FoundOne; - for (std::vector<MCCFIInstruction>::iterator I = Instructions.begin(), - E = Instructions.end(); I != E; ++I) { + for (std::vector<MCCFIInstruction>::const_iterator I = Instrs.begin(), + E = Instrs.end(); I != E; ++I) { if (I->getLabel() == Label) { emitCFIInstruction(*I); FoundOne = true; @@ -724,7 +740,7 @@ void AsmPrinter::EmitFunctionBody() { } break; case TargetOpcode::IMPLICIT_DEF: - if (isVerbose()) emitImplicitDef(II, *this); + if (isVerbose()) emitImplicitDef(II); break; case TargetOpcode::KILL: if (isVerbose()) emitKill(II, *this); @@ -877,7 +893,7 @@ bool AsmPrinter::doFinalization(Module &M) { if (V == GlobalValue::DefaultVisibility) continue; - MCSymbol *Name = Mang->getSymbol(&F); + MCSymbol *Name = getSymbol(&F); EmitVisibility(Name, V, false); } @@ -887,6 +903,9 @@ bool AsmPrinter::doFinalization(Module &M) { if (!ModuleFlags.empty()) getObjFileLowering().emitModuleFlags(OutStreamer, ModuleFlags, Mang, TM); + // Make sure we wrote out everything we need. + OutStreamer.Flush(); + // Finalize debug and EH information. if (DE) { { @@ -914,12 +933,12 @@ bool AsmPrinter::doFinalization(Module &M) { for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) { if (!I->hasExternalWeakLinkage()) continue; - OutStreamer.EmitSymbolAttribute(Mang->getSymbol(I), MCSA_WeakReference); + OutStreamer.EmitSymbolAttribute(getSymbol(I), MCSA_WeakReference); } for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) { if (!I->hasExternalWeakLinkage()) continue; - OutStreamer.EmitSymbolAttribute(Mang->getSymbol(I), MCSA_WeakReference); + OutStreamer.EmitSymbolAttribute(getSymbol(I), MCSA_WeakReference); } } @@ -927,14 +946,19 @@ bool AsmPrinter::doFinalization(Module &M) { OutStreamer.AddBlankLine(); for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end(); I != E; ++I) { - MCSymbol *Name = Mang->getSymbol(I); + MCSymbol *Name = getSymbol(I); const GlobalValue *GV = I->getAliasedGlobal(); - MCSymbol *Target = Mang->getSymbol(GV); + if (GV->isDeclaration()) { + report_fatal_error(Name->getName() + + ": Target doesn't support aliases to declarations"); + } + + MCSymbol *Target = getSymbol(GV); if (I->hasExternalLinkage() || !MAI->getWeakRefDirective()) OutStreamer.EmitSymbolAttribute(Name, MCSA_Global); - else if (I->hasWeakLinkage()) + else if (I->hasWeakLinkage() || I->hasLinkOnceLinkage()) OutStreamer.EmitSymbolAttribute(Name, MCSA_WeakReference); else assert(I->hasLocalLinkage() && "Invalid alias linkage"); @@ -953,6 +977,9 @@ bool AsmPrinter::doFinalization(Module &M) { if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*--I)) MP->finishAssembly(*this); + // Emit llvm.ident metadata in an '.ident' directive. + EmitModuleIdents(M); + // If we don't have any trampolines, then we don't require stack memory // to be executable. Some targets have a directive to declare this. Function *InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline"); @@ -976,7 +1003,7 @@ bool AsmPrinter::doFinalization(Module &M) { void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { this->MF = &MF; // Get the function symbol. - CurrentFnSym = Mang->getSymbol(MF.getFunction()); + CurrentFnSym = getSymbol(MF.getFunction()); CurrentFnSymForSize = CurrentFnSym; if (isVerbose()) @@ -1283,16 +1310,10 @@ void AsmPrinter::EmitLLVMUsedList(const ConstantArray *InitList) { const GlobalValue *GV = dyn_cast<GlobalValue>(InitList->getOperand(i)->stripPointerCasts()); if (GV && getObjFileLowering().shouldEmitUsedDirectiveFor(GV, Mang)) - OutStreamer.EmitSymbolAttribute(Mang->getSymbol(GV), MCSA_NoDeadStrip); + OutStreamer.EmitSymbolAttribute(getSymbol(GV), MCSA_NoDeadStrip); } } -typedef std::pair<unsigned, Constant*> Structor; - -static bool priority_order(const Structor& lhs, const Structor& rhs) { - return lhs.first < rhs.first; -} - /// EmitXXStructorList - Emit the ctor or dtor list taking into account the init /// priority. void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) { @@ -1309,6 +1330,7 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) { !isa<PointerType>(ETy->getTypeAtIndex(1U))) return; // Not (int, ptr). // Gather the structors in a form that's convenient for sorting by priority. + typedef std::pair<unsigned, Constant *> Structor; SmallVector<Structor, 8> Structors; for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i)); @@ -1322,9 +1344,9 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) { } // Emit the function pointers in the target-specific order - const DataLayout *TD = TM.getDataLayout(); - unsigned Align = Log2_32(TD->getPointerPrefAlignment()); - std::stable_sort(Structors.begin(), Structors.end(), priority_order); + const DataLayout *DL = TM.getDataLayout(); + unsigned Align = Log2_32(DL->getPointerPrefAlignment()); + std::stable_sort(Structors.begin(), Structors.end(), less_first()); for (unsigned i = 0, e = Structors.size(); i != e; ++i) { const MCSection *OutputSection = (isCtor ? @@ -1337,6 +1359,21 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) { } } +void AsmPrinter::EmitModuleIdents(Module &M) { + if (!MAI->hasIdentDirective()) + return; + + if (const NamedMDNode *NMD = M.getNamedMetadata("llvm.ident")) { + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { + const MDNode *N = NMD->getOperand(i); + assert(N->getNumOperands() == 1 && + "llvm.ident metadata entry can have only one operand"); + const MDString *S = cast<MDString>(N->getOperand(0)); + OutStreamer.EmitIdent(S->getString()); + } + } +} + //===--------------------------------------------------------------------===// // Emission and print routines // @@ -1402,12 +1439,12 @@ void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset, OutContext); if (!MAI->hasSetDirective()) - OutStreamer.EmitValue(Diff, 4); + OutStreamer.EmitValue(Diff, Size); else { // Otherwise, emit with .set (aka assignment). MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++); OutStreamer.EmitAssignment(SetLabel, Diff); - OutStreamer.EmitSymbolValue(SetLabel, 4); + OutStreamer.EmitSymbolValue(SetLabel, Size); } } @@ -1415,9 +1452,9 @@ void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset, /// where the size in bytes of the directive is specified by Size and Label /// specifies the label. This implicitly uses .set if it is available. void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, - unsigned Size) + unsigned Size, bool IsSectionRelative) const { - if (MAI->needsDwarfSectionOffsetDirective() && Size == 4) { // secrel32 ONLY works for 32bits. + if (MAI->needsDwarfSectionOffsetDirective() && IsSectionRelative) { OutStreamer.EmitCOFFSecRel32(Label); return; } @@ -1468,7 +1505,7 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { return MCConstantExpr::Create(CI->getZExtValue(), Ctx); if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) - return MCSymbolRefExpr::Create(AP.Mang->getSymbol(GV), Ctx); + return MCSymbolRefExpr::Create(AP.getSymbol(GV), Ctx); if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx); @@ -1498,10 +1535,10 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { report_fatal_error(OS.str()); } case Instruction::GetElementPtr: { - const DataLayout &TD = *AP.TM.getDataLayout(); + const DataLayout &DL = *AP.TM.getDataLayout(); // Generate a symbolic expression for the byte address - APInt OffsetAI(TD.getPointerSizeInBits(), 0); - cast<GEPOperator>(CE)->accumulateConstantOffset(TD, OffsetAI); + APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0); + cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI); const MCExpr *Base = lowerConstant(CE->getOperand(0), AP); if (!OffsetAI) @@ -1522,17 +1559,17 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { return lowerConstant(CE->getOperand(0), AP); case Instruction::IntToPtr: { - const DataLayout &TD = *AP.TM.getDataLayout(); + const DataLayout &DL = *AP.TM.getDataLayout(); // Handle casts to pointers by changing them into casts to the appropriate // integer type. This promotes constant folding and simplifies this code. Constant *Op = CE->getOperand(0); - Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()), + Op = ConstantExpr::getIntegerCast(Op, DL.getIntPtrType(CV->getType()), false/*ZExt*/); return lowerConstant(Op, AP); } case Instruction::PtrToInt: { - const DataLayout &TD = *AP.TM.getDataLayout(); + const DataLayout &DL = *AP.TM.getDataLayout(); // Support only foldable casts to/from pointers that can be eliminated by // changing the pointer to the appropriately sized integer type. Constant *Op = CE->getOperand(0); @@ -1542,13 +1579,13 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { // We can emit the pointer value into this slot if the slot is an // integer slot equal to the size of the pointer. - if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType())) + if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType())) return OpExpr; // Otherwise the pointer is smaller than the resultant integer, mask off // the high bits so we are sure to get a proper truncation if the input is // a constant expr. - unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType()); + unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType()); const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx); return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx); } @@ -1699,9 +1736,9 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, } } - const DataLayout &TD = *AP.TM.getDataLayout(); - unsigned Size = TD.getTypeAllocSize(CDS->getType()); - unsigned EmittedSize = TD.getTypeAllocSize(CDS->getType()->getElementType()) * + const DataLayout &DL = *AP.TM.getDataLayout(); + unsigned Size = DL.getTypeAllocSize(CDS->getType()); + unsigned EmittedSize = DL.getTypeAllocSize(CDS->getType()->getElementType()) * CDS->getNumElements(); if (unsigned Padding = Size - EmittedSize) AP.OutStreamer.EmitZeros(Padding); @@ -1727,9 +1764,9 @@ static void emitGlobalConstantVector(const ConstantVector *CV, AsmPrinter &AP) { for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i) emitGlobalConstantImpl(CV->getOperand(i), AP); - const DataLayout &TD = *AP.TM.getDataLayout(); - unsigned Size = TD.getTypeAllocSize(CV->getType()); - unsigned EmittedSize = TD.getTypeAllocSize(CV->getType()->getElementType()) * + const DataLayout &DL = *AP.TM.getDataLayout(); + unsigned Size = DL.getTypeAllocSize(CV->getType()); + unsigned EmittedSize = DL.getTypeAllocSize(CV->getType()->getElementType()) * CV->getType()->getNumElements(); if (unsigned Padding = Size - EmittedSize) AP.OutStreamer.EmitZeros(Padding); @@ -1737,15 +1774,15 @@ static void emitGlobalConstantVector(const ConstantVector *CV, AsmPrinter &AP) { static void emitGlobalConstantStruct(const ConstantStruct *CS, AsmPrinter &AP) { // Print the fields in successive locations. Pad to align if needed! - const DataLayout *TD = AP.TM.getDataLayout(); - unsigned Size = TD->getTypeAllocSize(CS->getType()); - const StructLayout *Layout = TD->getStructLayout(CS->getType()); + const DataLayout *DL = AP.TM.getDataLayout(); + unsigned Size = DL->getTypeAllocSize(CS->getType()); + const StructLayout *Layout = DL->getStructLayout(CS->getType()); uint64_t SizeSoFar = 0; for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i) { const Constant *Field = CS->getOperand(i); // Check if padding is needed and insert one or more 0s. - uint64_t FieldSize = TD->getTypeAllocSize(Field->getType()); + uint64_t FieldSize = DL->getTypeAllocSize(Field->getType()); uint64_t PadSize = ((i == e-1 ? Size : Layout->getElementOffset(i+1)) - Layout->getElementOffset(i)) - FieldSize; SizeSoFar += FieldSize + PadSize; @@ -1802,13 +1839,13 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) { } // Emit the tail padding for the long double. - const DataLayout &TD = *AP.TM.getDataLayout(); - AP.OutStreamer.EmitZeros(TD.getTypeAllocSize(CFP->getType()) - - TD.getTypeStoreSize(CFP->getType())); + const DataLayout &DL = *AP.TM.getDataLayout(); + AP.OutStreamer.EmitZeros(DL.getTypeAllocSize(CFP->getType()) - + DL.getTypeStoreSize(CFP->getType())); } static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) { - const DataLayout *TD = AP.TM.getDataLayout(); + const DataLayout *DL = AP.TM.getDataLayout(); unsigned BitWidth = CI->getBitWidth(); // Copy the value as we may massage the layout for constants whose bit width @@ -1825,7 +1862,7 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) { // Big endian: // * Record the extra bits to emit. // * Realign the raw data to emit the chunks of 64-bits. - if (TD->isBigEndian()) { + if (DL->isBigEndian()) { // Basically the structure of the raw data is a chunk of 64-bits cells: // 0 1 BitWidth / 64 // [chunk1][chunk2] ... [chunkN]. @@ -1846,7 +1883,7 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) { // quantities at a time. const uint64_t *RawData = Realigned.getRawData(); for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) { - uint64_t Val = TD->isBigEndian() ? RawData[e - i - 1] : RawData[i]; + uint64_t Val = DL->isBigEndian() ? RawData[e - i - 1] : RawData[i]; AP.OutStreamer.EmitIntValue(Val, 8); } @@ -1864,8 +1901,8 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) { } static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) { - const DataLayout *TD = AP.TM.getDataLayout(); - uint64_t Size = TD->getTypeAllocSize(CV->getType()); + const DataLayout *DL = AP.TM.getDataLayout(); + uint64_t Size = DL->getTypeAllocSize(CV->getType()); if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV)) return AP.OutStreamer.EmitZeros(Size); @@ -1913,7 +1950,7 @@ static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) { // If the constant expression's size is greater than 64-bits, then we have // to emit the value in chunks. Try to constant fold the value and emit it // that way. - Constant *New = ConstantFoldConstantExpression(CE, TD); + Constant *New = ConstantFoldConstantExpression(CE, DL); if (New && New != CE) return emitGlobalConstantImpl(New, AP); } diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index c141d60..b92f49c 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -185,5 +185,11 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const { case MCCFIInstruction::OpOffset: OutStreamer.EmitCFIOffset(Inst.getRegister(), Inst.getOffset()); break; + case MCCFIInstruction::OpRegister: + OutStreamer.EmitCFIRegister(Inst.getRegister(), Inst.getRegister2()); + break; + case MCCFIInstruction::OpWindowSave: + OutStreamer.EmitCFIWindowSave(); + break; } } diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index d8e9c95..4f927f6 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -123,7 +123,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode, TM.getTargetCPU(), TM.getTargetFeatureString())); OwningPtr<MCTargetAsmParser> - TAP(TM.getTarget().createMCAsmParser(*STI, *Parser)); + TAP(TM.getTarget().createMCAsmParser(*STI, *Parser, *MII)); if (!TAP) report_fatal_error("Inline asm not supported by this streamer because" " we don't have an asm parser for this target\n"); diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt index 65e7bee..be484a6 100644 --- a/lib/CodeGen/AsmPrinter/CMakeLists.txt +++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt @@ -4,6 +4,7 @@ add_llvm_library(LLVMAsmPrinter AsmPrinterDwarf.cpp AsmPrinterInlineAsm.cpp DIE.cpp + DIEHash.cpp DwarfAccelTable.cpp DwarfCFIException.cpp DwarfCompileUnit.cpp diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index ab03861..6944428 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -34,8 +34,10 @@ using namespace llvm; /// Profile - Used to gather unique data for the abbreviation folding set. /// void DIEAbbrevData::Profile(FoldingSetNodeID &ID) const { - ID.AddInteger(Attribute); - ID.AddInteger(Form); + // Explicitly cast to an integer type for which FoldingSetNodeID has + // overloads. Otherwise MSVC 2010 thinks this call is ambiguous. + ID.AddInteger(unsigned(Attribute)); + ID.AddInteger(unsigned(Form)); } //===----------------------------------------------------------------------===// @@ -45,7 +47,7 @@ void DIEAbbrevData::Profile(FoldingSetNodeID &ID) const { /// Profile - Used to gather unique data for the abbreviation folding set. /// void DIEAbbrev::Profile(FoldingSetNodeID &ID) const { - ID.AddInteger(Tag); + ID.AddInteger(unsigned(Tag)); ID.AddInteger(ChildrenFlag); // For each attribute description. @@ -112,17 +114,25 @@ DIE::~DIE() { /// Climb up the parent chain to get the compile unit DIE to which this DIE /// belongs. -DIE *DIE::getCompileUnit() { - DIE *p = this; +const DIE *DIE::getCompileUnit() const { + const DIE *Cu = getCompileUnitOrNull(); + assert(Cu && "We should not have orphaned DIEs."); + return Cu; +} + +/// Climb up the parent chain to get the compile unit DIE this DIE belongs +/// to. Return NULL if DIE is not added to an owner yet. +const DIE *DIE::getCompileUnitOrNull() const { + const DIE *p = this; while (p) { if (p->getTag() == dwarf::DW_TAG_compile_unit) return p; p = p->getParent(); } - llvm_unreachable("We should not have orphaned DIEs."); + return NULL; } -DIEValue *DIE::findAttribute(unsigned Attribute) { +DIEValue *DIE::findAttribute(uint16_t Attribute) { const SmallVectorImpl<DIEValue *> &Values = getValues(); const DIEAbbrev &Abbrevs = getAbbrev(); @@ -199,14 +209,14 @@ void DIEValue::dump() const { /// EmitValue - Emit integer of appropriate size. /// -void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const { +void DIEInteger::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const { unsigned Size = ~0U; switch (Form) { case dwarf::DW_FORM_flag_present: // Emit something to keep the lines and comments in sync. // FIXME: Is there a better way to do this? if (Asm->OutStreamer.hasRawTextSupport()) - Asm->OutStreamer.EmitRawText(StringRef("")); + Asm->OutStreamer.EmitRawText(""); return; case dwarf::DW_FORM_flag: // Fall thru case dwarf::DW_FORM_ref1: // Fall thru @@ -231,7 +241,7 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const { /// SizeOf - Determine size of integer value in bytes. /// -unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const { +unsigned DIEInteger::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { switch (Form) { case dwarf::DW_FORM_flag_present: return 0; case dwarf::DW_FORM_flag: // Fall thru @@ -266,13 +276,13 @@ void DIEInteger::print(raw_ostream &O) const { /// EmitValue - Emit expression value. /// -void DIEExpr::EmitValue(AsmPrinter *AP, unsigned Form) const { +void DIEExpr::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { AP->OutStreamer.EmitValue(Expr, SizeOf(AP, Form)); } /// SizeOf - Determine size of expression value in bytes. /// -unsigned DIEExpr::SizeOf(AsmPrinter *AP, unsigned Form) const { +unsigned DIEExpr::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_data4) return 4; if (Form == dwarf::DW_FORM_sec_offset) return 4; if (Form == dwarf::DW_FORM_strp) return 4; @@ -292,13 +302,16 @@ void DIEExpr::print(raw_ostream &O) const { /// EmitValue - Emit label value. /// -void DIELabel::EmitValue(AsmPrinter *AP, unsigned Form) const { - AP->EmitLabelReference(Label, SizeOf(AP, Form)); +void DIELabel::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { + AP->EmitLabelReference(Label, SizeOf(AP, Form), + Form == dwarf::DW_FORM_strp || + Form == dwarf::DW_FORM_sec_offset || + Form == dwarf::DW_FORM_ref_addr); } /// SizeOf - Determine size of label value in bytes. /// -unsigned DIELabel::SizeOf(AsmPrinter *AP, unsigned Form) const { +unsigned DIELabel::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_data4) return 4; if (Form == dwarf::DW_FORM_sec_offset) return 4; if (Form == dwarf::DW_FORM_strp) return 4; @@ -317,13 +330,13 @@ void DIELabel::print(raw_ostream &O) const { /// EmitValue - Emit delta value. /// -void DIEDelta::EmitValue(AsmPrinter *AP, unsigned Form) const { +void DIEDelta::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { AP->EmitLabelDifference(LabelHi, LabelLo, SizeOf(AP, Form)); } /// SizeOf - Determine size of delta value in bytes. /// -unsigned DIEDelta::SizeOf(AsmPrinter *AP, unsigned Form) const { +unsigned DIEDelta::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_data4) return 4; if (Form == dwarf::DW_FORM_strp) return 4; return AP->getDataLayout().getPointerSize(); @@ -341,13 +354,13 @@ void DIEDelta::print(raw_ostream &O) const { /// EmitValue - Emit string value. /// -void DIEString::EmitValue(AsmPrinter *AP, unsigned Form) const { +void DIEString::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { Access->EmitValue(AP, Form); } /// SizeOf - Determine size of delta value in bytes. /// -unsigned DIEString::SizeOf(AsmPrinter *AP, unsigned Form) const { +unsigned DIEString::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { return Access->SizeOf(AP, Form); } @@ -364,7 +377,7 @@ void DIEString::print(raw_ostream &O) const { /// EmitValue - Emit debug information entry offset. /// -void DIEEntry::EmitValue(AsmPrinter *AP, unsigned Form) const { +void DIEEntry::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { AP->EmitInt32(Entry->getOffset()); } @@ -402,7 +415,7 @@ unsigned DIEBlock::ComputeSize(AsmPrinter *AP) { /// EmitValue - Emit block data. /// -void DIEBlock::EmitValue(AsmPrinter *Asm, unsigned Form) const { +void DIEBlock::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const { switch (Form) { default: llvm_unreachable("Improper form for block"); case dwarf::DW_FORM_block1: Asm->EmitInt8(Size); break; @@ -418,7 +431,7 @@ void DIEBlock::EmitValue(AsmPrinter *Asm, unsigned Form) const { /// SizeOf - Determine size of block data in bytes. /// -unsigned DIEBlock::SizeOf(AsmPrinter *AP, unsigned Form) const { +unsigned DIEBlock::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { switch (Form) { case dwarf::DW_FORM_block1: return Size + sizeof(int8_t); case dwarf::DW_FORM_block2: return Size + sizeof(int16_t); diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h index bfd7d1d..f4fa326 100644 --- a/lib/CodeGen/AsmPrinter/DIE.h +++ b/lib/CodeGen/AsmPrinter/DIE.h @@ -33,17 +33,17 @@ namespace llvm { class DIEAbbrevData { /// Attribute - Dwarf attribute code. /// - uint16_t Attribute; + dwarf::Attribute Attribute; /// Form - Dwarf form code. /// - uint16_t Form; + dwarf::Form Form; public: - DIEAbbrevData(uint16_t A, uint16_t F) : Attribute(A), Form(F) {} + DIEAbbrevData(dwarf::Attribute A, dwarf::Form F) : Attribute(A), Form(F) {} // Accessors. - uint16_t getAttribute() const { return Attribute; } - uint16_t getForm() const { return Form; } + dwarf::Attribute getAttribute() const { return Attribute; } + dwarf::Form getForm() const { return Form; } /// Profile - Used to gather unique data for the abbreviation folding set. /// @@ -56,7 +56,7 @@ namespace llvm { class DIEAbbrev : public FoldingSetNode { /// Tag - Dwarf tag code. /// - uint16_t Tag; + dwarf::Tag Tag; /// ChildrenFlag - Dwarf children flag. /// @@ -71,20 +71,19 @@ namespace llvm { SmallVector<DIEAbbrevData, 12> Data; public: - DIEAbbrev(uint16_t T, uint16_t C) : Tag(T), ChildrenFlag(C), Data() {} + DIEAbbrev(dwarf::Tag T, uint16_t C) : Tag(T), ChildrenFlag(C), Data() {} // Accessors. - uint16_t getTag() const { return Tag; } + dwarf::Tag getTag() const { return Tag; } unsigned getNumber() const { return Number; } uint16_t getChildrenFlag() const { return ChildrenFlag; } const SmallVectorImpl<DIEAbbrevData> &getData() const { return Data; } - void setTag(uint16_t T) { Tag = T; } void setChildrenFlag(uint16_t CF) { ChildrenFlag = CF; } void setNumber(unsigned N) { Number = N; } /// AddAttribute - Adds another set of attribute information to the /// abbreviation. - void AddAttribute(uint16_t Attribute, uint16_t Form) { + void AddAttribute(dwarf::Attribute Attribute, dwarf::Form Form) { Data.push_back(DIEAbbrevData(Attribute, Form)); } @@ -131,19 +130,17 @@ namespace llvm { /// SmallVector<DIEValue*, 12> Values; -#ifndef NDEBUG - // Private data for print() - mutable unsigned IndentCount; -#endif public: explicit DIE(unsigned Tag) - : Offset(0), Size(0), Abbrev(Tag, dwarf::DW_CHILDREN_no), Parent(0) {} + : Offset(0), Size(0), Abbrev((dwarf::Tag)Tag, dwarf::DW_CHILDREN_no), + Parent(0) {} virtual ~DIE(); // Accessors. DIEAbbrev &getAbbrev() { return Abbrev; } + const DIEAbbrev &getAbbrev() const { return Abbrev; } unsigned getAbbrevNumber() const { return Abbrev.getNumber(); } - unsigned getTag() const { return Abbrev.getTag(); } + dwarf::Tag getTag() const { return Abbrev.getTag(); } unsigned getOffset() const { return Offset; } unsigned getSize() const { return Size; } const std::vector<DIE *> &getChildren() const { return Children; } @@ -151,14 +148,17 @@ namespace llvm { DIE *getParent() const { return Parent; } /// Climb up the parent chain to get the compile unit DIE this DIE belongs /// to. - DIE *getCompileUnit(); - void setTag(unsigned Tag) { Abbrev.setTag(Tag); } + const DIE *getCompileUnit() const; + /// Similar to getCompileUnit, returns null when DIE is not added to an + /// owner yet. + const DIE *getCompileUnitOrNull() const; void setOffset(unsigned O) { Offset = O; } void setSize(unsigned S) { Size = S; } /// addValue - Add a value and attributes to a DIE. /// - void addValue(unsigned Attribute, unsigned Form, DIEValue *Value) { + void addValue(dwarf::Attribute Attribute, dwarf::Form Form, + DIEValue *Value) { Abbrev.AddAttribute(Attribute, Form); Values.push_back(Value); } @@ -166,10 +166,7 @@ namespace llvm { /// addChild - Add a child to the DIE. /// void addChild(DIE *Child) { - if (Child->getParent()) { - assert (Child->getParent() == this && "Unexpected DIE Parent!"); - return; - } + assert(!Child->getParent()); Abbrev.setChildrenFlag(dwarf::DW_CHILDREN_yes); Children.push_back(Child); Child->Parent = this; @@ -177,7 +174,7 @@ namespace llvm { /// findAttribute - Find a value in the DIE with the attribute given, returns NULL /// if no such attribute exists. - DIEValue *findAttribute(unsigned Attribute); + DIEValue *findAttribute(uint16_t Attribute); #ifndef NDEBUG void print(raw_ostream &O, unsigned IndentCount = 0) const; @@ -213,11 +210,11 @@ namespace llvm { /// EmitValue - Emit value via the Dwarf writer. /// - virtual void EmitValue(AsmPrinter *AP, unsigned Form) const = 0; + virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const = 0; /// SizeOf - Return the size of a value in bytes. /// - virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const = 0; + virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const = 0; #ifndef NDEBUG virtual void print(raw_ostream &O) const = 0; @@ -235,7 +232,7 @@ namespace llvm { /// BestForm - Choose the best form for integer. /// - static unsigned BestForm(bool IsSigned, uint64_t Int) { + static dwarf::Form BestForm(bool IsSigned, uint64_t Int) { if (IsSigned) { const int64_t SignedInt = Int; if ((char)Int == SignedInt) return dwarf::DW_FORM_data1; @@ -251,13 +248,13 @@ namespace llvm { /// EmitValue - Emit integer of appropriate size. /// - virtual void EmitValue(AsmPrinter *AP, unsigned Form) const; + virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const; uint64_t getValue() const { return Integer; } /// SizeOf - Determine size of integer value in bytes. /// - virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const; + virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const; // Implement isa/cast/dyncast. static bool classof(const DIEValue *I) { return I->getType() == isInteger; } @@ -277,7 +274,7 @@ namespace llvm { /// EmitValue - Emit expression value. /// - virtual void EmitValue(AsmPrinter *AP, unsigned Form) const; + virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const; /// getValue - Get MCExpr. /// @@ -285,7 +282,7 @@ namespace llvm { /// SizeOf - Determine size of expression value in bytes. /// - virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const; + virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const; // Implement isa/cast/dyncast. static bool classof(const DIEValue *E) { return E->getType() == isExpr; } @@ -305,7 +302,7 @@ namespace llvm { /// EmitValue - Emit label value. /// - virtual void EmitValue(AsmPrinter *AP, unsigned Form) const; + virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const; /// getValue - Get MCSymbol. /// @@ -313,7 +310,7 @@ namespace llvm { /// SizeOf - Determine size of label value in bytes. /// - virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const; + virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const; // Implement isa/cast/dyncast. static bool classof(const DIEValue *L) { return L->getType() == isLabel; } @@ -335,11 +332,11 @@ namespace llvm { /// EmitValue - Emit delta value. /// - virtual void EmitValue(AsmPrinter *AP, unsigned Form) const; + virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const; /// SizeOf - Determine size of delta value in bytes. /// - virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const; + virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const; // Implement isa/cast/dyncast. static bool classof(const DIEValue *D) { return D->getType() == isDelta; } @@ -365,11 +362,11 @@ namespace llvm { /// EmitValue - Emit delta value. /// - virtual void EmitValue(AsmPrinter *AP, unsigned Form) const; + virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const; /// SizeOf - Determine size of delta value in bytes. /// - virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const; + virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const; // Implement isa/cast/dyncast. static bool classof(const DIEValue *D) { return D->getType() == isString; } @@ -394,13 +391,13 @@ namespace llvm { /// EmitValue - Emit debug information entry offset. /// - virtual void EmitValue(AsmPrinter *AP, unsigned Form) const; + virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const; /// SizeOf - Determine size of debug information entry in bytes. /// - virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const { - return Form == dwarf::DW_FORM_ref_addr ? getRefAddrSize(AP) : - sizeof(int32_t); + virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const { + return Form == dwarf::DW_FORM_ref_addr ? getRefAddrSize(AP) + : sizeof(int32_t); } /// Returns size of a ref_addr entry. @@ -420,9 +417,7 @@ namespace llvm { class DIEBlock : public DIEValue, public DIE { unsigned Size; // Size in bytes excluding size header. public: - DIEBlock() - : DIEValue(isBlock), DIE(0), Size(0) {} - virtual ~DIEBlock() {} + DIEBlock() : DIEValue(isBlock), DIE(0), Size(0) {} /// ComputeSize - calculate the size of the block. /// @@ -430,7 +425,7 @@ namespace llvm { /// BestForm - Choose the best form for data. /// - unsigned BestForm() const { + dwarf::Form BestForm() const { if ((unsigned char)Size == Size) return dwarf::DW_FORM_block1; if ((unsigned short)Size == Size) return dwarf::DW_FORM_block2; if ((unsigned int)Size == Size) return dwarf::DW_FORM_block4; @@ -439,11 +434,11 @@ namespace llvm { /// EmitValue - Emit block data. /// - virtual void EmitValue(AsmPrinter *AP, unsigned Form) const; + virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const; /// SizeOf - Determine size of block data in bytes. /// - virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const; + virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const; // Implement isa/cast/dyncast. static bool classof(const DIEValue *E) { return E->getType() == isBlock; } diff --git a/lib/CodeGen/AsmPrinter/DIEHash.cpp b/lib/CodeGen/AsmPrinter/DIEHash.cpp new file mode 100644 index 0000000..95eca90 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DIEHash.cpp @@ -0,0 +1,507 @@ +//===-- llvm/CodeGen/DIEHash.cpp - Dwarf Hashing Framework ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for DWARF4 hashing of DIEs. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "dwarfdebug" + +#include "DIEHash.h" + +#include "DIE.h" +#include "DwarfCompileUnit.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/MD5.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +/// \brief Grabs the string in whichever attribute is passed in and returns +/// a reference to it. +static StringRef getDIEStringAttr(const DIE &Die, uint16_t Attr) { + const SmallVectorImpl<DIEValue *> &Values = Die.getValues(); + const DIEAbbrev &Abbrevs = Die.getAbbrev(); + + // Iterate through all the attributes until we find the one we're + // looking for, if we can't find it return an empty string. + for (size_t i = 0; i < Values.size(); ++i) { + if (Abbrevs.getData()[i].getAttribute() == Attr) { + DIEValue *V = Values[i]; + assert(isa<DIEString>(V) && "String requested. Not a string."); + DIEString *S = cast<DIEString>(V); + return S->getString(); + } + } + return StringRef(""); +} + +/// \brief Adds the string in \p Str to the hash. This also hashes +/// a trailing NULL with the string. +void DIEHash::addString(StringRef Str) { + DEBUG(dbgs() << "Adding string " << Str << " to hash.\n"); + Hash.update(Str); + Hash.update(makeArrayRef((uint8_t)'\0')); +} + +// FIXME: The LEB128 routines are copied and only slightly modified out of +// LEB128.h. + +/// \brief Adds the unsigned in \p Value to the hash encoded as a ULEB128. +void DIEHash::addULEB128(uint64_t Value) { + DEBUG(dbgs() << "Adding ULEB128 " << Value << " to hash.\n"); + do { + uint8_t Byte = Value & 0x7f; + Value >>= 7; + if (Value != 0) + Byte |= 0x80; // Mark this byte to show that more bytes will follow. + Hash.update(Byte); + } while (Value != 0); +} + +void DIEHash::addSLEB128(int64_t Value) { + DEBUG(dbgs() << "Adding ULEB128 " << Value << " to hash.\n"); + bool More; + do { + uint8_t Byte = Value & 0x7f; + Value >>= 7; + More = !((((Value == 0 ) && ((Byte & 0x40) == 0)) || + ((Value == -1) && ((Byte & 0x40) != 0)))); + if (More) + Byte |= 0x80; // Mark this byte to show that more bytes will follow. + Hash.update(Byte); + } while (More); +} + +/// \brief Including \p Parent adds the context of Parent to the hash.. +void DIEHash::addParentContext(const DIE &Parent) { + + DEBUG(dbgs() << "Adding parent context to hash...\n"); + + // [7.27.2] For each surrounding type or namespace beginning with the + // outermost such construct... + SmallVector<const DIE *, 1> Parents; + const DIE *Cur = &Parent; + while (Cur->getTag() != dwarf::DW_TAG_compile_unit) { + Parents.push_back(Cur); + Cur = Cur->getParent(); + } + + // Reverse iterate over our list to go from the outermost construct to the + // innermost. + for (SmallVectorImpl<const DIE *>::reverse_iterator I = Parents.rbegin(), + E = Parents.rend(); + I != E; ++I) { + const DIE &Die = **I; + + // ... Append the letter "C" to the sequence... + addULEB128('C'); + + // ... Followed by the DWARF tag of the construct... + addULEB128(Die.getTag()); + + // ... Then the name, taken from the DW_AT_name attribute. + StringRef Name = getDIEStringAttr(Die, dwarf::DW_AT_name); + DEBUG(dbgs() << "... adding context: " << Name << "\n"); + if (!Name.empty()) + addString(Name); + } +} + +// Collect all of the attributes for a particular DIE in single structure. +void DIEHash::collectAttributes(const DIE &Die, DIEAttrs &Attrs) { + const SmallVectorImpl<DIEValue *> &Values = Die.getValues(); + const DIEAbbrev &Abbrevs = Die.getAbbrev(); + +#define COLLECT_ATTR(NAME) \ + case dwarf::NAME: \ + Attrs.NAME.Val = Values[i]; \ + Attrs.NAME.Desc = &Abbrevs.getData()[i]; \ + break + + for (size_t i = 0, e = Values.size(); i != e; ++i) { + DEBUG(dbgs() << "Attribute: " + << dwarf::AttributeString(Abbrevs.getData()[i].getAttribute()) + << " added.\n"); + switch (Abbrevs.getData()[i].getAttribute()) { + COLLECT_ATTR(DW_AT_name); + COLLECT_ATTR(DW_AT_accessibility); + COLLECT_ATTR(DW_AT_address_class); + COLLECT_ATTR(DW_AT_allocated); + COLLECT_ATTR(DW_AT_artificial); + COLLECT_ATTR(DW_AT_associated); + COLLECT_ATTR(DW_AT_binary_scale); + COLLECT_ATTR(DW_AT_bit_offset); + COLLECT_ATTR(DW_AT_bit_size); + COLLECT_ATTR(DW_AT_bit_stride); + COLLECT_ATTR(DW_AT_byte_size); + COLLECT_ATTR(DW_AT_byte_stride); + COLLECT_ATTR(DW_AT_const_expr); + COLLECT_ATTR(DW_AT_const_value); + COLLECT_ATTR(DW_AT_containing_type); + COLLECT_ATTR(DW_AT_count); + COLLECT_ATTR(DW_AT_data_bit_offset); + COLLECT_ATTR(DW_AT_data_location); + COLLECT_ATTR(DW_AT_data_member_location); + COLLECT_ATTR(DW_AT_decimal_scale); + COLLECT_ATTR(DW_AT_decimal_sign); + COLLECT_ATTR(DW_AT_default_value); + COLLECT_ATTR(DW_AT_digit_count); + COLLECT_ATTR(DW_AT_discr); + COLLECT_ATTR(DW_AT_discr_list); + COLLECT_ATTR(DW_AT_discr_value); + COLLECT_ATTR(DW_AT_encoding); + COLLECT_ATTR(DW_AT_enum_class); + COLLECT_ATTR(DW_AT_endianity); + COLLECT_ATTR(DW_AT_explicit); + COLLECT_ATTR(DW_AT_is_optional); + COLLECT_ATTR(DW_AT_location); + COLLECT_ATTR(DW_AT_lower_bound); + COLLECT_ATTR(DW_AT_mutable); + COLLECT_ATTR(DW_AT_ordering); + COLLECT_ATTR(DW_AT_picture_string); + COLLECT_ATTR(DW_AT_prototyped); + COLLECT_ATTR(DW_AT_small); + COLLECT_ATTR(DW_AT_segment); + COLLECT_ATTR(DW_AT_string_length); + COLLECT_ATTR(DW_AT_threads_scaled); + COLLECT_ATTR(DW_AT_upper_bound); + COLLECT_ATTR(DW_AT_use_location); + COLLECT_ATTR(DW_AT_use_UTF8); + COLLECT_ATTR(DW_AT_variable_parameter); + COLLECT_ATTR(DW_AT_virtuality); + COLLECT_ATTR(DW_AT_visibility); + COLLECT_ATTR(DW_AT_vtable_elem_location); + COLLECT_ATTR(DW_AT_type); + default: + break; + } + } +} + +void DIEHash::hashShallowTypeReference(dwarf::Attribute Attribute, + const DIE &Entry, StringRef Name) { + // append the letter 'N' + addULEB128('N'); + + // the DWARF attribute code (DW_AT_type or DW_AT_friend), + addULEB128(Attribute); + + // the context of the tag, + if (const DIE *Parent = Entry.getParent()) + addParentContext(*Parent); + + // the letter 'E', + addULEB128('E'); + + // and the name of the type. + addString(Name); + + // Currently DW_TAG_friends are not used by Clang, but if they do become so, + // here's the relevant spec text to implement: + // + // For DW_TAG_friend, if the referenced entry is the DW_TAG_subprogram, + // the context is omitted and the name to be used is the ABI-specific name + // of the subprogram (e.g., the mangled linker name). +} + +void DIEHash::hashRepeatedTypeReference(dwarf::Attribute Attribute, + unsigned DieNumber) { + // a) If T is in the list of [previously hashed types], use the letter + // 'R' as the marker + addULEB128('R'); + + addULEB128(Attribute); + + // and use the unsigned LEB128 encoding of [the index of T in the + // list] as the attribute value; + addULEB128(DieNumber); +} + +void DIEHash::hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag, + const DIE &Entry) { + assert(Tag != dwarf::DW_TAG_friend && "No current LLVM clients emit friend " + "tags. Add support here when there's " + "a use case"); + // Step 5 + // If the tag in Step 3 is one of [the below tags] + if ((Tag == dwarf::DW_TAG_pointer_type || + Tag == dwarf::DW_TAG_reference_type || + Tag == dwarf::DW_TAG_rvalue_reference_type || + Tag == dwarf::DW_TAG_ptr_to_member_type) && + // and the referenced type (via the [below attributes]) + // FIXME: This seems overly restrictive, and causes hash mismatches + // there's a decl/def difference in the containing type of a + // ptr_to_member_type, but it's what DWARF says, for some reason. + Attribute == dwarf::DW_AT_type) { + // ... has a DW_AT_name attribute, + StringRef Name = getDIEStringAttr(Entry, dwarf::DW_AT_name); + if (!Name.empty()) { + hashShallowTypeReference(Attribute, Entry, Name); + return; + } + } + + unsigned &DieNumber = Numbering[&Entry]; + if (DieNumber) { + hashRepeatedTypeReference(Attribute, DieNumber); + return; + } + + // otherwise, b) use the letter 'T' as a the marker, ... + addULEB128('T'); + + addULEB128(Attribute); + + // ... process the type T recursively by performing Steps 2 through 7, and + // use the result as the attribute value. + DieNumber = Numbering.size(); + computeHash(Entry); +} + +// Hash an individual attribute \param Attr based on the type of attribute and +// the form. +void DIEHash::hashAttribute(AttrEntry Attr, dwarf::Tag Tag) { + const DIEValue *Value = Attr.Val; + const DIEAbbrevData *Desc = Attr.Desc; + dwarf::Attribute Attribute = Desc->getAttribute(); + + // 7.27 Step 3 + // ... An attribute that refers to another type entry T is processed as + // follows: + if (const DIEEntry *EntryAttr = dyn_cast<DIEEntry>(Value)) { + hashDIEEntry(Attribute, Tag, *EntryAttr->getEntry()); + return; + } + + // Other attribute values use the letter 'A' as the marker, ... + addULEB128('A'); + + addULEB128(Attribute); + + // ... and the value consists of the form code (encoded as an unsigned LEB128 + // value) followed by the encoding of the value according to the form code. To + // ensure reproducibility of the signature, the set of forms used in the + // signature computation is limited to the following: DW_FORM_sdata, + // DW_FORM_flag, DW_FORM_string, and DW_FORM_block. + switch (Desc->getForm()) { + case dwarf::DW_FORM_string: + llvm_unreachable( + "Add support for DW_FORM_string if we ever start emitting them again"); + case dwarf::DW_FORM_GNU_str_index: + case dwarf::DW_FORM_strp: + addULEB128(dwarf::DW_FORM_string); + addString(cast<DIEString>(Value)->getString()); + break; + case dwarf::DW_FORM_data1: + case dwarf::DW_FORM_data2: + case dwarf::DW_FORM_data4: + case dwarf::DW_FORM_data8: + case dwarf::DW_FORM_udata: + addULEB128(dwarf::DW_FORM_sdata); + addSLEB128((int64_t)cast<DIEInteger>(Value)->getValue()); + break; + default: + llvm_unreachable("Add support for additional forms"); + } +} + +// Go through the attributes from \param Attrs in the order specified in 7.27.4 +// and hash them. +void DIEHash::hashAttributes(const DIEAttrs &Attrs, dwarf::Tag Tag) { +#define ADD_ATTR(ATTR) \ + { \ + if (ATTR.Val != 0) \ + hashAttribute(ATTR, Tag); \ + } + + ADD_ATTR(Attrs.DW_AT_name); + ADD_ATTR(Attrs.DW_AT_accessibility); + ADD_ATTR(Attrs.DW_AT_address_class); + ADD_ATTR(Attrs.DW_AT_allocated); + ADD_ATTR(Attrs.DW_AT_artificial); + ADD_ATTR(Attrs.DW_AT_associated); + ADD_ATTR(Attrs.DW_AT_binary_scale); + ADD_ATTR(Attrs.DW_AT_bit_offset); + ADD_ATTR(Attrs.DW_AT_bit_size); + ADD_ATTR(Attrs.DW_AT_bit_stride); + ADD_ATTR(Attrs.DW_AT_byte_size); + ADD_ATTR(Attrs.DW_AT_byte_stride); + ADD_ATTR(Attrs.DW_AT_const_expr); + ADD_ATTR(Attrs.DW_AT_const_value); + ADD_ATTR(Attrs.DW_AT_containing_type); + ADD_ATTR(Attrs.DW_AT_count); + ADD_ATTR(Attrs.DW_AT_data_bit_offset); + ADD_ATTR(Attrs.DW_AT_data_location); + ADD_ATTR(Attrs.DW_AT_data_member_location); + ADD_ATTR(Attrs.DW_AT_decimal_scale); + ADD_ATTR(Attrs.DW_AT_decimal_sign); + ADD_ATTR(Attrs.DW_AT_default_value); + ADD_ATTR(Attrs.DW_AT_digit_count); + ADD_ATTR(Attrs.DW_AT_discr); + ADD_ATTR(Attrs.DW_AT_discr_list); + ADD_ATTR(Attrs.DW_AT_discr_value); + ADD_ATTR(Attrs.DW_AT_encoding); + ADD_ATTR(Attrs.DW_AT_enum_class); + ADD_ATTR(Attrs.DW_AT_endianity); + ADD_ATTR(Attrs.DW_AT_explicit); + ADD_ATTR(Attrs.DW_AT_is_optional); + ADD_ATTR(Attrs.DW_AT_location); + ADD_ATTR(Attrs.DW_AT_lower_bound); + ADD_ATTR(Attrs.DW_AT_mutable); + ADD_ATTR(Attrs.DW_AT_ordering); + ADD_ATTR(Attrs.DW_AT_picture_string); + ADD_ATTR(Attrs.DW_AT_prototyped); + ADD_ATTR(Attrs.DW_AT_small); + ADD_ATTR(Attrs.DW_AT_segment); + ADD_ATTR(Attrs.DW_AT_string_length); + ADD_ATTR(Attrs.DW_AT_threads_scaled); + ADD_ATTR(Attrs.DW_AT_upper_bound); + ADD_ATTR(Attrs.DW_AT_use_location); + ADD_ATTR(Attrs.DW_AT_use_UTF8); + ADD_ATTR(Attrs.DW_AT_variable_parameter); + ADD_ATTR(Attrs.DW_AT_virtuality); + ADD_ATTR(Attrs.DW_AT_visibility); + ADD_ATTR(Attrs.DW_AT_vtable_elem_location); + ADD_ATTR(Attrs.DW_AT_type); + + // FIXME: Add the extended attributes. +} + +// Add all of the attributes for \param Die to the hash. +void DIEHash::addAttributes(const DIE &Die) { + DIEAttrs Attrs = {}; + collectAttributes(Die, Attrs); + hashAttributes(Attrs, Die.getTag()); +} + +void DIEHash::hashNestedType(const DIE &Die, StringRef Name) { + // 7.27 Step 7 + // ... append the letter 'S', + addULEB128('S'); + + // the tag of C, + addULEB128(Die.getTag()); + + // and the name. + addString(Name); +} + +// Compute the hash of a DIE. This is based on the type signature computation +// given in section 7.27 of the DWARF4 standard. It is the md5 hash of a +// flattened description of the DIE. +void DIEHash::computeHash(const DIE &Die) { + // Append the letter 'D', followed by the DWARF tag of the DIE. + addULEB128('D'); + addULEB128(Die.getTag()); + + // Add each of the attributes of the DIE. + addAttributes(Die); + + // Then hash each of the children of the DIE. + for (std::vector<DIE *>::const_iterator I = Die.getChildren().begin(), + E = Die.getChildren().end(); + I != E; ++I) { + // 7.27 Step 7 + // If C is a nested type entry or a member function entry, ... + if (isType((*I)->getTag()) || (*I)->getTag() == dwarf::DW_TAG_subprogram) { + StringRef Name = getDIEStringAttr(**I, dwarf::DW_AT_name); + // ... and has a DW_AT_name attribute + if (!Name.empty()) { + hashNestedType(**I, Name); + continue; + } + } + computeHash(**I); + } + + // Following the last (or if there are no children), append a zero byte. + Hash.update(makeArrayRef((uint8_t)'\0')); +} + +/// This is based on the type signature computation given in section 7.27 of the +/// DWARF4 standard. It is the md5 hash of a flattened description of the DIE +/// with the exception that we are hashing only the context and the name of the +/// type. +uint64_t DIEHash::computeDIEODRSignature(const DIE &Die) { + + // Add the contexts to the hash. We won't be computing the ODR hash for + // function local types so it's safe to use the generic context hashing + // algorithm here. + // FIXME: If we figure out how to account for linkage in some way we could + // actually do this with a slight modification to the parent hash algorithm. + if (const DIE *Parent = Die.getParent()) + addParentContext(*Parent); + + // Add the current DIE information. + + // Add the DWARF tag of the DIE. + addULEB128(Die.getTag()); + + // Add the name of the type to the hash. + addString(getDIEStringAttr(Die, dwarf::DW_AT_name)); + + // Now get the result. + MD5::MD5Result Result; + Hash.final(Result); + + // ... take the least significant 8 bytes and return those. Our MD5 + // implementation always returns its results in little endian, swap bytes + // appropriately. + return *reinterpret_cast<support::ulittle64_t *>(Result + 8); +} + +/// This is based on the type signature computation given in section 7.27 of the +/// DWARF4 standard. It is an md5 hash of the flattened description of the DIE +/// with the inclusion of the full CU and all top level CU entities. +// TODO: Initialize the type chain at 0 instead of 1 for CU signatures. +uint64_t DIEHash::computeCUSignature(const DIE &Die) { + Numbering.clear(); + Numbering[&Die] = 1; + + // Hash the DIE. + computeHash(Die); + + // Now return the result. + MD5::MD5Result Result; + Hash.final(Result); + + // ... take the least significant 8 bytes and return those. Our MD5 + // implementation always returns its results in little endian, swap bytes + // appropriately. + return *reinterpret_cast<support::ulittle64_t *>(Result + 8); +} + +/// This is based on the type signature computation given in section 7.27 of the +/// DWARF4 standard. It is an md5 hash of the flattened description of the DIE +/// with the inclusion of additional forms not specifically called out in the +/// standard. +uint64_t DIEHash::computeTypeSignature(const DIE &Die) { + Numbering.clear(); + Numbering[&Die] = 1; + + if (const DIE *Parent = Die.getParent()) + addParentContext(*Parent); + + // Hash the DIE. + computeHash(Die); + + // Now return the result. + MD5::MD5Result Result; + Hash.final(Result); + + // ... take the least significant 8 bytes and return those. Our MD5 + // implementation always returns its results in little endian, swap bytes + // appropriately. + return *reinterpret_cast<support::ulittle64_t *>(Result + 8); +} diff --git a/lib/CodeGen/AsmPrinter/DIEHash.h b/lib/CodeGen/AsmPrinter/DIEHash.h new file mode 100644 index 0000000..f0c4ef9 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DIEHash.h @@ -0,0 +1,147 @@ +//===-- llvm/CodeGen/DIEHash.h - Dwarf Hashing Framework -------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for DWARF4 hashing of DIEs. +// +//===----------------------------------------------------------------------===// + +#include "DIE.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/MD5.h" + +namespace llvm { + +class CompileUnit; + +/// \brief An object containing the capability of hashing and adding hash +/// attributes onto a DIE. +class DIEHash { + // The entry for a particular attribute. + struct AttrEntry { + const DIEValue *Val; + const DIEAbbrevData *Desc; + }; + + // Collection of all attributes used in hashing a particular DIE. + struct DIEAttrs { + AttrEntry DW_AT_name; + AttrEntry DW_AT_accessibility; + AttrEntry DW_AT_address_class; + AttrEntry DW_AT_allocated; + AttrEntry DW_AT_artificial; + AttrEntry DW_AT_associated; + AttrEntry DW_AT_binary_scale; + AttrEntry DW_AT_bit_offset; + AttrEntry DW_AT_bit_size; + AttrEntry DW_AT_bit_stride; + AttrEntry DW_AT_byte_size; + AttrEntry DW_AT_byte_stride; + AttrEntry DW_AT_const_expr; + AttrEntry DW_AT_const_value; + AttrEntry DW_AT_containing_type; + AttrEntry DW_AT_count; + AttrEntry DW_AT_data_bit_offset; + AttrEntry DW_AT_data_location; + AttrEntry DW_AT_data_member_location; + AttrEntry DW_AT_decimal_scale; + AttrEntry DW_AT_decimal_sign; + AttrEntry DW_AT_default_value; + AttrEntry DW_AT_digit_count; + AttrEntry DW_AT_discr; + AttrEntry DW_AT_discr_list; + AttrEntry DW_AT_discr_value; + AttrEntry DW_AT_encoding; + AttrEntry DW_AT_enum_class; + AttrEntry DW_AT_endianity; + AttrEntry DW_AT_explicit; + AttrEntry DW_AT_is_optional; + AttrEntry DW_AT_location; + AttrEntry DW_AT_lower_bound; + AttrEntry DW_AT_mutable; + AttrEntry DW_AT_ordering; + AttrEntry DW_AT_picture_string; + AttrEntry DW_AT_prototyped; + AttrEntry DW_AT_small; + AttrEntry DW_AT_segment; + AttrEntry DW_AT_string_length; + AttrEntry DW_AT_threads_scaled; + AttrEntry DW_AT_upper_bound; + AttrEntry DW_AT_use_location; + AttrEntry DW_AT_use_UTF8; + AttrEntry DW_AT_variable_parameter; + AttrEntry DW_AT_virtuality; + AttrEntry DW_AT_visibility; + AttrEntry DW_AT_vtable_elem_location; + AttrEntry DW_AT_type; + + // Insert any additional ones here... + }; + +public: + /// \brief Computes the ODR signature. + uint64_t computeDIEODRSignature(const DIE &Die); + + /// \brief Computes the CU signature. + uint64_t computeCUSignature(const DIE &Die); + + /// \brief Computes the type signature. + uint64_t computeTypeSignature(const DIE &Die); + + // Helper routines to process parts of a DIE. +private: + /// \brief Adds the parent context of \param Die to the hash. + void addParentContext(const DIE &Die); + + /// \brief Adds the attributes of \param Die to the hash. + void addAttributes(const DIE &Die); + + /// \brief Computes the full DWARF4 7.27 hash of the DIE. + void computeHash(const DIE &Die); + + // Routines that add DIEValues to the hash. +private: + /// \brief Encodes and adds \param Value to the hash as a ULEB128. + void addULEB128(uint64_t Value); + + /// \brief Encodes and adds \param Value to the hash as a SLEB128. + void addSLEB128(int64_t Value); + + /// \brief Adds \param Str to the hash and includes a NULL byte. + void addString(StringRef Str); + + /// \brief Collects the attributes of DIE \param Die into the \param Attrs + /// structure. + void collectAttributes(const DIE &Die, DIEAttrs &Attrs); + + /// \brief Hashes the attributes in \param Attrs in order. + void hashAttributes(const DIEAttrs &Attrs, dwarf::Tag Tag); + + /// \brief Hashes an individual attribute. + void hashAttribute(AttrEntry Attr, dwarf::Tag Tag); + + /// \brief Hashes an attribute that refers to another DIE. + void hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag, + const DIE &Entry); + + /// \brief Hashes a reference to a named type in such a way that is + /// independent of whether that type is described by a declaration or a + /// definition. + void hashShallowTypeReference(dwarf::Attribute Attribute, const DIE &Entry, + StringRef Name); + + /// \brief Hashes a reference to a previously referenced type DIE. + void hashRepeatedTypeReference(dwarf::Attribute Attribute, unsigned DieNumber); + + void hashNestedType(const DIE &Die, StringRef Name); + +private: + MD5 Hash; + DenseMap<const DIE *, unsigned> Numbering; +}; +} diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp index a82a149..689aeda 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp @@ -24,27 +24,14 @@ using namespace llvm; -const char *DwarfAccelTable::Atom::AtomTypeString(enum AtomType AT) { - switch (AT) { - case eAtomTypeNULL: return "eAtomTypeNULL"; - case eAtomTypeDIEOffset: return "eAtomTypeDIEOffset"; - case eAtomTypeCUOffset: return "eAtomTypeCUOffset"; - case eAtomTypeTag: return "eAtomTypeTag"; - case eAtomTypeNameFlags: return "eAtomTypeNameFlags"; - case eAtomTypeTypeFlags: return "eAtomTypeTypeFlags"; - } - llvm_unreachable("invalid AtomType!"); -} - // The length of the header data is always going to be 4 + 4 + 4*NumAtoms. -DwarfAccelTable::DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom> atomList) : - Header(8 + (atomList.size() * 4)), - HeaderData(atomList), - Entries(Allocator) { } +DwarfAccelTable::DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom> atomList) + : Header(8 + (atomList.size() * 4)), HeaderData(atomList), + Entries(Allocator) {} -DwarfAccelTable::~DwarfAccelTable() { } +DwarfAccelTable::~DwarfAccelTable() {} -void DwarfAccelTable::AddName(StringRef Name, DIE* die, char Flags) { +void DwarfAccelTable::AddName(StringRef Name, DIE *die, char Flags) { assert(Data.empty() && "Already finalized!"); // If the string is in the list already then add this die to the list // otherwise add a new one. @@ -59,13 +46,16 @@ void DwarfAccelTable::ComputeBucketCount(void) { uniques[i] = Data[i]->HashValue; array_pod_sort(uniques.begin(), uniques.end()); std::vector<uint32_t>::iterator p = - std::unique(uniques.begin(), uniques.end()); + std::unique(uniques.begin(), uniques.end()); uint32_t num = std::distance(uniques.begin(), p); // Then compute the bucket size, minimum of 1 bucket. - if (num > 1024) Header.bucket_count = num/4; - if (num > 16) Header.bucket_count = num/2; - else Header.bucket_count = num > 0 ? num : 1; + if (num > 1024) + Header.bucket_count = num / 4; + if (num > 16) + Header.bucket_count = num / 2; + else + Header.bucket_count = num > 0 ? num : 1; Header.hashes_count = num; } @@ -78,13 +68,13 @@ static bool compareDIEs(const DwarfAccelTable::HashDataContents *A, void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, StringRef Prefix) { // Create the individual hash data outputs. - for (StringMap<DataArray>::iterator - EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) { + for (StringMap<DataArray>::iterator EI = Entries.begin(), EE = Entries.end(); + EI != EE; ++EI) { // Unique the entries. std::stable_sort(EI->second.begin(), EI->second.end(), compareDIEs); EI->second.erase(std::unique(EI->second.begin(), EI->second.end()), - EI->second.end()); + EI->second.end()); HashData *Entry = new (Allocator) HashData(EI->getKey(), EI->second); Data.push_back(Entry); @@ -126,7 +116,7 @@ void DwarfAccelTable::EmitHeader(AsmPrinter *Asm) { Asm->EmitInt32(HeaderData.Atoms.size()); for (size_t i = 0; i < HeaderData.Atoms.size(); i++) { Atom A = HeaderData.Atoms[i]; - Asm->OutStreamer.AddComment(Atom::AtomTypeString(A.type)); + Asm->OutStreamer.AddComment(dwarf::AtomTypeString(A.type)); Asm->EmitInt16(A.type); Asm->OutStreamer.AddComment(dwarf::FormEncodingString(A.form)); Asm->EmitInt16(A.form); @@ -152,7 +142,8 @@ void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) { void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) { for (size_t i = 0, e = Buckets.size(); i < e; ++i) { for (HashList::const_iterator HI = Buckets[i].begin(), - HE = Buckets[i].end(); HI != HE; ++HI) { + HE = Buckets[i].end(); + HI != HE; ++HI) { Asm->OutStreamer.AddComment("Hash in Bucket " + Twine(i)); Asm->EmitInt32((*HI)->HashValue); } @@ -166,13 +157,13 @@ void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) { void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) { for (size_t i = 0, e = Buckets.size(); i < e; ++i) { for (HashList::const_iterator HI = Buckets[i].begin(), - HE = Buckets[i].end(); HI != HE; ++HI) { + HE = Buckets[i].end(); + HI != HE; ++HI) { Asm->OutStreamer.AddComment("Offset in Bucket " + Twine(i)); MCContext &Context = Asm->OutStreamer.getContext(); - const MCExpr *Sub = - MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create((*HI)->Sym, Context), - MCSymbolRefExpr::Create(SecBegin, Context), - Context); + const MCExpr *Sub = MCBinaryExpr::CreateSub( + MCSymbolRefExpr::Create((*HI)->Sym, Context), + MCSymbolRefExpr::Create(SecBegin, Context), Context); Asm->OutStreamer.EmitValue(Sub, sizeof(uint32_t)); } } @@ -185,7 +176,8 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfUnits *D) { uint64_t PrevHash = UINT64_MAX; for (size_t i = 0, e = Buckets.size(); i < e; ++i) { for (HashList::const_iterator HI = Buckets[i].begin(), - HE = Buckets[i].end(); HI != HE; ++HI) { + HE = Buckets[i].end(); + HI != HE; ++HI) { // Remember to emit the label for our offset. Asm->OutStreamer.EmitLabel((*HI)->Sym); Asm->OutStreamer.AddComment((*HI)->Str); @@ -193,8 +185,9 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfUnits *D) { D->getStringPoolSym()); Asm->OutStreamer.AddComment("Num DIEs"); Asm->EmitInt32((*HI)->Data.size()); - for (ArrayRef<HashDataContents*>::const_iterator - DI = (*HI)->Data.begin(), DE = (*HI)->Data.end(); + for (ArrayRef<HashDataContents *>::const_iterator + DI = (*HI)->Data.begin(), + DE = (*HI)->Data.end(); DI != DE; ++DI) { // Emit the DIE offset Asm->EmitInt32((*DI)->Die->getOffset()); @@ -214,8 +207,7 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfUnits *D) { } // Emit the entire data structure to the output file. -void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, - DwarfUnits *D) { +void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, DwarfUnits *D) { // Emit the header. EmitHeader(Asm); @@ -239,11 +231,12 @@ void DwarfAccelTable::print(raw_ostream &O) { HeaderData.print(O); O << "Entries: \n"; - for (StringMap<DataArray>::const_iterator - EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) { + for (StringMap<DataArray>::const_iterator EI = Entries.begin(), + EE = Entries.end(); + EI != EE; ++EI) { O << "Name: " << EI->getKeyData() << "\n"; for (DataArray::const_iterator DI = EI->second.begin(), - DE = EI->second.end(); + DE = EI->second.end(); DI != DE; ++DI) (*DI)->print(O); } @@ -251,14 +244,14 @@ void DwarfAccelTable::print(raw_ostream &O) { O << "Buckets and Hashes: \n"; for (size_t i = 0, e = Buckets.size(); i < e; ++i) for (HashList::const_iterator HI = Buckets[i].begin(), - HE = Buckets[i].end(); HI != HE; ++HI) + HE = Buckets[i].end(); + HI != HE; ++HI) (*HI)->print(O); O << "Data: \n"; - for (std::vector<HashData*>::const_iterator - DI = Data.begin(), DE = Data.end(); DI != DE; ++DI) - (*DI)->print(O); - - + for (std::vector<HashData *>::const_iterator DI = Data.begin(), + DE = Data.end(); + DI != DE; ++DI) + (*DI)->print(O); } #endif diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h index 3ef1dc5..7627313 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h @@ -67,11 +67,7 @@ class DwarfUnits; class DwarfAccelTable { - enum HashFunctionType { - eHashFunctionDJB = 0u - }; - - static uint32_t HashDJB (StringRef Str) { + static uint32_t HashDJB(StringRef Str) { uint32_t h = 5381; for (unsigned i = 0, e = Str.size(); i != e; ++i) h = ((h << 5) + h) + Str[i]; @@ -80,25 +76,25 @@ class DwarfAccelTable { // Helper function to compute the number of buckets needed based on // the number of unique hashes. - void ComputeBucketCount (void); + void ComputeBucketCount(void); struct TableHeader { - uint32_t magic; // 'HASH' magic value to allow endian detection - uint16_t version; // Version number. - uint16_t hash_function; // The hash function enumeration that was used. - uint32_t bucket_count; // The number of buckets in this hash table. - uint32_t hashes_count; // The total number of unique hash values - // and hash data offsets in this table. - uint32_t header_data_len; // The bytes to skip to get to the hash - // indexes (buckets) for correct alignment. + uint32_t magic; // 'HASH' magic value to allow endian detection + uint16_t version; // Version number. + uint16_t hash_function; // The hash function enumeration that was used. + uint32_t bucket_count; // The number of buckets in this hash table. + uint32_t hashes_count; // The total number of unique hash values + // and hash data offsets in this table. + uint32_t header_data_len; // The bytes to skip to get to the hash + // indexes (buckets) for correct alignment. // Also written to disk is the implementation specific header data. static const uint32_t MagicHash = 0x48415348; - TableHeader (uint32_t data_len) : - magic (MagicHash), version (1), hash_function (eHashFunctionDJB), - bucket_count (0), hashes_count (0), header_data_len (data_len) - {} + TableHeader(uint32_t data_len) + : magic(MagicHash), version(1), + hash_function(dwarf::DW_hash_function_djb), bucket_count(0), + hashes_count(0), header_data_len(data_len) {} #ifndef NDEBUG void print(raw_ostream &O) { @@ -124,62 +120,38 @@ public: // uint32_t die_offset_base // uint32_t atom_count // atom_count Atoms - enum AtomType { - eAtomTypeNULL = 0u, - eAtomTypeDIEOffset = 1u, // DIE offset, check form for encoding - eAtomTypeCUOffset = 2u, // DIE offset of the compiler unit header that - // contains the item in question - eAtomTypeTag = 3u, // DW_TAG_xxx value, should be encoded as - // DW_FORM_data1 (if no tags exceed 255) or - // DW_FORM_data2. - eAtomTypeNameFlags = 4u, // Flags from enum NameFlags - eAtomTypeTypeFlags = 5u // Flags from enum TypeFlags - }; - - enum TypeFlags { - eTypeFlagClassMask = 0x0000000fu, - - // Always set for C++, only set for ObjC if this is the - // @implementation for a class. - eTypeFlagClassIsImplementation = ( 1u << 1 ) - }; // Make these public so that they can be used as a general interface to // the class. struct Atom { - AtomType type; // enum AtomType + uint16_t type; // enum AtomType uint16_t form; // DWARF DW_FORM_ defines - Atom(AtomType type, uint16_t form) : type(type), form(form) {} - static const char * AtomTypeString(enum AtomType); + Atom(uint16_t type, uint16_t form) : type(type), form(form) {} #ifndef NDEBUG void print(raw_ostream &O) { - O << "Type: " << AtomTypeString(type) << "\n" + O << "Type: " << dwarf::AtomTypeString(type) << "\n" << "Form: " << dwarf::FormEncodingString(form) << "\n"; } - void dump() { - print(dbgs()); - } + void dump() { print(dbgs()); } #endif }; - private: +private: struct TableHeaderData { uint32_t die_offset_base; SmallVector<Atom, 1> Atoms; TableHeaderData(ArrayRef<Atom> AtomList, uint32_t offset = 0) - : die_offset_base(offset), Atoms(AtomList.begin(), AtomList.end()) { } + : die_offset_base(offset), Atoms(AtomList.begin(), AtomList.end()) {} #ifndef NDEBUG - void print (raw_ostream &O) { + void print(raw_ostream &O) { O << "die_offset_base: " << die_offset_base << "\n"; for (size_t i = 0; i < Atoms.size(); i++) Atoms[i].print(O); } - void dump() { - print(dbgs()); - } + void dump() { print(dbgs()); } #endif }; @@ -193,37 +165,38 @@ public: // HashData[hash_data_count] public: struct HashDataContents { - DIE *Die; // Offsets + DIE *Die; // Offsets char Flags; // Specific flags to output - HashDataContents(DIE *D, char Flags) : - Die(D), - Flags(Flags) { } - #ifndef NDEBUG + HashDataContents(DIE *D, char Flags) : Die(D), Flags(Flags) {} +#ifndef NDEBUG void print(raw_ostream &O) const { O << " Offset: " << Die->getOffset() << "\n"; O << " Tag: " << dwarf::TagString(Die->getTag()) << "\n"; O << " Flags: " << Flags << "\n"; } - #endif +#endif }; + private: struct HashData { StringRef Str; uint32_t HashValue; MCSymbol *Sym; - ArrayRef<HashDataContents*> Data; // offsets - HashData(StringRef S, ArrayRef<HashDataContents*> Data) - : Str(S), Data(Data) { + ArrayRef<HashDataContents *> Data; // offsets + HashData(StringRef S, ArrayRef<HashDataContents *> Data) + : Str(S), Data(Data) { HashValue = DwarfAccelTable::HashDJB(S); } - #ifndef NDEBUG +#ifndef NDEBUG void print(raw_ostream &O) { O << "Name: " << Str << "\n"; O << " Hash Value: " << format("0x%x", HashValue) << "\n"; - O << " Symbol: " ; - if (Sym) Sym->print(O); - else O << "<none>"; + O << " Symbol: "; + if (Sym) + Sym->print(O); + else + O << "<none>"; O << "\n"; for (size_t i = 0; i < Data.size(); i++) { O << " Offset: " << Data[i]->Die->getOffset() << "\n"; @@ -231,14 +204,12 @@ private: O << " Flags: " << Data[i]->Flags << "\n"; } } - void dump() { - print(dbgs()); - } - #endif + void dump() { print(dbgs()); } +#endif }; - DwarfAccelTable(const DwarfAccelTable&) LLVM_DELETED_FUNCTION; - void operator=(const DwarfAccelTable&) LLVM_DELETED_FUNCTION; + DwarfAccelTable(const DwarfAccelTable &) LLVM_DELETED_FUNCTION; + void operator=(const DwarfAccelTable &) LLVM_DELETED_FUNCTION; // Internal Functions void EmitHeader(AsmPrinter *); @@ -253,24 +224,24 @@ private: // Output Variables TableHeader Header; TableHeaderData HeaderData; - std::vector<HashData*> Data; + std::vector<HashData *> Data; // String Data - typedef std::vector<HashDataContents*> DataArray; - typedef StringMap<DataArray, BumpPtrAllocator&> StringEntries; + typedef std::vector<HashDataContents *> DataArray; + typedef StringMap<DataArray, BumpPtrAllocator &> StringEntries; StringEntries Entries; // Buckets/Hashes/Offsets - typedef std::vector<HashData*> HashList; + typedef std::vector<HashData *> HashList; typedef std::vector<HashList> BucketList; BucketList Buckets; HashList Hashes; // Public Implementation - public: +public: DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom>); ~DwarfAccelTable(); - void AddName(StringRef, DIE*, char = 0); + void AddName(StringRef, DIE *, char = 0); void FinalizeTable(AsmPrinter *, StringRef); void Emit(AsmPrinter *, MCSymbol *, DwarfUnits *); #ifndef NDEBUG @@ -278,6 +249,5 @@ private: void dump() { print(dbgs()); } #endif }; - } #endif diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index fec5ced..8918f3d 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -68,7 +68,7 @@ void DwarfCFIException::EndModule() { for (size_t i = 0, e = Personalities.size(); i != e; ++i) { if (!Personalities[i]) continue; - MCSymbol *Sym = Asm->Mang->getSymbol(Personalities[i]); + MCSymbol *Sym = Asm->getSymbol(Personalities[i]); TLOF.emitPersonalityValue(Asm->OutStreamer, Asm->TM, Sym); AtLeastOne = true; } diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index df8ca17..a6ff953 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -22,8 +22,8 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instructions.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" @@ -33,12 +33,12 @@ using namespace llvm; /// CompileUnit - Compile unit constructor. -CompileUnit::CompileUnit(unsigned UID, unsigned L, DIE *D, const MDNode *N, +CompileUnit::CompileUnit(unsigned UID, DIE *D, DICompileUnit Node, AsmPrinter *A, DwarfDebug *DW, DwarfUnits *DWU) - : UniqueID(UID), Language(L), CUDie(D), Asm(A), DD(DW), DU(DWU), - IndexTyDie(0), DebugInfoOffset(0) { + : UniqueID(UID), Node(Node), CUDie(D), Asm(A), DD(DW), DU(DWU), + IndexTyDie(0), DebugInfoOffset(0) { DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1); - insertDIE(N, D); + insertDIE(Node, D); } /// ~CompileUnit - Destructor for compile unit. @@ -57,7 +57,7 @@ DIEEntry *CompileUnit::createDIEEntry(DIE *Entry) { /// getDefaultLowerBound - Return the default lower bound for an array. If the /// DWARF version doesn't handle the language, return -1. int64_t CompileUnit::getDefaultLowerBound() const { - switch (Language) { + switch (getLanguage()) { default: break; @@ -98,32 +98,71 @@ int64_t CompileUnit::getDefaultLowerBound() const { return -1; } +/// Check whether the DIE for this MDNode can be shared across CUs. +static bool isShareableAcrossCUs(DIDescriptor D) { + // When the MDNode can be part of the type system, the DIE can be + // shared across CUs. + return D.isType() || + (D.isSubprogram() && !DISubprogram(D).isDefinition()); +} + +/// getDIE - Returns the debug information entry map slot for the +/// specified debug variable. We delegate the request to DwarfDebug +/// when the DIE for this MDNode can be shared across CUs. The mappings +/// will be kept in DwarfDebug for shareable DIEs. +DIE *CompileUnit::getDIE(DIDescriptor D) const { + if (isShareableAcrossCUs(D)) + return DD->getDIE(D); + return MDNodeToDieMap.lookup(D); +} + +/// insertDIE - Insert DIE into the map. We delegate the request to DwarfDebug +/// when the DIE for this MDNode can be shared across CUs. The mappings +/// will be kept in DwarfDebug for shareable DIEs. +void CompileUnit::insertDIE(DIDescriptor Desc, DIE *D) { + if (isShareableAcrossCUs(Desc)) { + DD->insertDIE(Desc, D); + return; + } + MDNodeToDieMap.insert(std::make_pair(Desc, D)); +} + /// addFlag - Add a flag that is true. -void CompileUnit::addFlag(DIE *Die, unsigned Attribute) { - if (!DD->useDarwinGDBCompat()) - Die->addValue(Attribute, dwarf::DW_FORM_flag_present, - DIEIntegerOne); +void CompileUnit::addFlag(DIE *Die, dwarf::Attribute Attribute) { + if (DD->getDwarfVersion() >= 4) + Die->addValue(Attribute, dwarf::DW_FORM_flag_present, DIEIntegerOne); else - addUInt(Die, Attribute, dwarf::DW_FORM_flag, 1); + Die->addValue(Attribute, dwarf::DW_FORM_flag, DIEIntegerOne); } /// addUInt - Add an unsigned integer attribute data and value. /// -void CompileUnit::addUInt(DIE *Die, unsigned Attribute, - unsigned Form, uint64_t Integer) { - if (!Form) Form = DIEInteger::BestForm(false, Integer); - DIEValue *Value = Integer == 1 ? - DIEIntegerOne : new (DIEValueAllocator) DIEInteger(Integer); - Die->addValue(Attribute, Form, Value); +void CompileUnit::addUInt(DIE *Die, dwarf::Attribute Attribute, + Optional<dwarf::Form> Form, uint64_t Integer) { + if (!Form) + Form = DIEInteger::BestForm(false, Integer); + DIEValue *Value = Integer == 1 ? DIEIntegerOne : new (DIEValueAllocator) + DIEInteger(Integer); + Die->addValue(Attribute, *Form, Value); +} + +void CompileUnit::addUInt(DIEBlock *Block, dwarf::Form Form, uint64_t Integer) { + addUInt(Block, (dwarf::Attribute)0, Form, Integer); } /// addSInt - Add an signed integer attribute data and value. /// -void CompileUnit::addSInt(DIE *Die, unsigned Attribute, - unsigned Form, int64_t Integer) { - if (!Form) Form = DIEInteger::BestForm(true, Integer); +void CompileUnit::addSInt(DIE *Die, dwarf::Attribute Attribute, + Optional<dwarf::Form> Form, int64_t Integer) { + if (!Form) + Form = DIEInteger::BestForm(true, Integer); DIEValue *Value = new (DIEValueAllocator) DIEInteger(Integer); - Die->addValue(Attribute, Form, Value); + Die->addValue(Attribute, *Form, Value); +} + +void CompileUnit::addSInt(DIEBlock *Die, Optional<dwarf::Form> Form, + int64_t Integer) { + addSInt(Die, (dwarf::Attribute)0, Form, Integer); } /// addString - Add a string attribute data and value. We always emit a @@ -131,9 +170,10 @@ void CompileUnit::addSInt(DIE *Die, unsigned Attribute, /// more predictable sizes. In the case of split dwarf we emit an index /// into another table which gets us the static offset into the string /// table. -void CompileUnit::addString(DIE *Die, unsigned Attribute, StringRef String) { +void CompileUnit::addString(DIE *Die, dwarf::Attribute Attribute, + StringRef String) { DIEValue *Value; - unsigned Form; + dwarf::Form Form; if (!DD->useSplitDwarf()) { MCSymbol *Symb = DU->getStringPoolEntry(String); if (Asm->needsRelocationsForDwarfStringPool()) @@ -154,7 +194,7 @@ void CompileUnit::addString(DIE *Die, unsigned Attribute, StringRef String) { /// addLocalString - Add a string attribute data and value. This is guaranteed /// to be in the local string pool instead of indirected. -void CompileUnit::addLocalString(DIE *Die, unsigned Attribute, +void CompileUnit::addLocalString(DIE *Die, dwarf::Attribute Attribute, StringRef String) { MCSymbol *Symb = DU->getStringPoolEntry(String); DIEValue *Value; @@ -169,25 +209,32 @@ void CompileUnit::addLocalString(DIE *Die, unsigned Attribute, /// addExpr - Add a Dwarf expression attribute data and value. /// -void CompileUnit::addExpr(DIE *Die, unsigned Attribute, unsigned Form, - const MCExpr *Expr) { +void CompileUnit::addExpr(DIEBlock *Die, dwarf::Form Form, const MCExpr *Expr) { DIEValue *Value = new (DIEValueAllocator) DIEExpr(Expr); - Die->addValue(Attribute, Form, Value); + Die->addValue((dwarf::Attribute)0, Form, Value); } /// addLabel - Add a Dwarf label attribute data and value. /// -void CompileUnit::addLabel(DIE *Die, unsigned Attribute, unsigned Form, - const MCSymbol *Label) { +void CompileUnit::addLabel(DIE *Die, dwarf::Attribute Attribute, + dwarf::Form Form, const MCSymbol *Label) { DIEValue *Value = new (DIEValueAllocator) DIELabel(Label); Die->addValue(Attribute, Form, Value); } +void CompileUnit::addLabel(DIEBlock *Die, dwarf::Form Form, + const MCSymbol *Label) { + addLabel(Die, (dwarf::Attribute)0, Form, Label); +} + /// addLabelAddress - Add a dwarf label attribute data and value using /// DW_FORM_addr or DW_FORM_GNU_addr_index. /// -void CompileUnit::addLabelAddress(DIE *Die, unsigned Attribute, +void CompileUnit::addLabelAddress(DIE *Die, dwarf::Attribute Attribute, MCSymbol *Label) { + if (Label) + DD->addArangeLabel(SymbolCU(this, Label)); + if (!DD->useSplitDwarf()) { if (Label != NULL) { DIEValue *Value = new (DIEValueAllocator) DIELabel(Label); @@ -206,34 +253,60 @@ void CompileUnit::addLabelAddress(DIE *Die, unsigned Attribute, /// addOpAddress - Add a dwarf op address data and value using the /// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index. /// -void CompileUnit::addOpAddress(DIE *Die, const MCSymbol *Sym) { +void CompileUnit::addOpAddress(DIEBlock *Die, const MCSymbol *Sym) { + DD->addArangeLabel(SymbolCU(this, Sym)); if (!DD->useSplitDwarf()) { - addUInt(Die, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); - addLabel(Die, 0, dwarf::DW_FORM_udata, Sym); + addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); + addLabel(Die, dwarf::DW_FORM_udata, Sym); } else { - addUInt(Die, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_addr_index); - addUInt(Die, 0, dwarf::DW_FORM_GNU_addr_index, DU->getAddrPoolIndex(Sym)); + addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_addr_index); + addUInt(Die, dwarf::DW_FORM_GNU_addr_index, DU->getAddrPoolIndex(Sym)); } } /// addDelta - Add a label delta attribute data and value. /// -void CompileUnit::addDelta(DIE *Die, unsigned Attribute, unsigned Form, - const MCSymbol *Hi, const MCSymbol *Lo) { +void CompileUnit::addDelta(DIE *Die, dwarf::Attribute Attribute, + dwarf::Form Form, const MCSymbol *Hi, + const MCSymbol *Lo) { DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo); Die->addValue(Attribute, Form, Value); } /// addDIEEntry - Add a DIE attribute data and value. /// -void CompileUnit::addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, +void CompileUnit::addDIEEntry(DIE *Die, dwarf::Attribute Attribute, DIE *Entry) { - Die->addValue(Attribute, Form, createDIEEntry(Entry)); + addDIEEntry(Die, Attribute, createDIEEntry(Entry)); +} + +void CompileUnit::addDIEEntry(DIE *Die, dwarf::Attribute Attribute, + DIEEntry *Entry) { + const DIE *DieCU = Die->getCompileUnitOrNull(); + const DIE *EntryCU = Entry->getEntry()->getCompileUnitOrNull(); + if (!DieCU) + // We assume that Die belongs to this CU, if it is not linked to any CU yet. + DieCU = getCUDie(); + if (!EntryCU) + EntryCU = getCUDie(); + Die->addValue(Attribute, EntryCU == DieCU ? dwarf::DW_FORM_ref4 + : dwarf::DW_FORM_ref_addr, + Entry); +} + +/// Create a DIE with the given Tag, add the DIE to its parent, and +/// call insertDIE if MD is not null. +DIE *CompileUnit::createAndAddDIE(unsigned Tag, DIE &Parent, DIDescriptor N) { + DIE *Die = new DIE(Tag); + Parent.addChild(Die); + if (N) + insertDIE(N, Die); + return Die; } /// addBlock - Add block data. /// -void CompileUnit::addBlock(DIE *Die, unsigned Attribute, unsigned Form, +void CompileUnit::addBlock(DIE *Die, dwarf::Attribute Attribute, DIEBlock *Block) { Block->ComputeSize(Asm); DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on. @@ -250,12 +323,12 @@ void CompileUnit::addSourceLine(DIE *Die, DIVariable V) { unsigned Line = V.getLineNumber(); if (Line == 0) return; - unsigned FileID = DD->getOrCreateSourceID(V.getContext().getFilename(), - V.getContext().getDirectory(), - getUniqueID()); + unsigned FileID = + DD->getOrCreateSourceID(V.getContext().getFilename(), + V.getContext().getDirectory(), getUniqueID()); assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); + addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, None, Line); } /// addSourceLine - Add location information to specified debug information @@ -268,11 +341,11 @@ void CompileUnit::addSourceLine(DIE *Die, DIGlobalVariable G) { unsigned Line = G.getLineNumber(); if (Line == 0) return; - unsigned FileID = DD->getOrCreateSourceID(G.getFilename(), G.getDirectory(), - getUniqueID()); + unsigned FileID = + DD->getOrCreateSourceID(G.getFilename(), G.getDirectory(), getUniqueID()); assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); + addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, None, Line); } /// addSourceLine - Add location information to specified debug information @@ -287,11 +360,11 @@ void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) { if (Line == 0) return; - unsigned FileID = DD->getOrCreateSourceID(SP.getFilename(), - SP.getDirectory(), getUniqueID()); + unsigned FileID = DD->getOrCreateSourceID(SP.getFilename(), SP.getDirectory(), + getUniqueID()); assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); + addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, None, Line); } /// addSourceLine - Add location information to specified debug information @@ -304,11 +377,11 @@ void CompileUnit::addSourceLine(DIE *Die, DIType Ty) { unsigned Line = Ty.getLineNumber(); if (Line == 0) return; - unsigned FileID = DD->getOrCreateSourceID(Ty.getFilename(), - Ty.getDirectory(), getUniqueID()); + unsigned FileID = DD->getOrCreateSourceID(Ty.getFilename(), Ty.getDirectory(), + getUniqueID()); assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); + addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, None, Line); } /// addSourceLine - Add location information to specified debug information @@ -325,8 +398,8 @@ void CompileUnit::addSourceLine(DIE *Die, DIObjCProperty Ty) { unsigned FileID = DD->getOrCreateSourceID(File.getFilename(), File.getDirectory(), getUniqueID()); assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); + addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, None, Line); } /// addSourceLine - Add location information to specified debug information @@ -341,11 +414,11 @@ void CompileUnit::addSourceLine(DIE *Die, DINameSpace NS) { return; StringRef FN = NS.getFilename(); - unsigned FileID = DD->getOrCreateSourceID(FN, NS.getDirectory(), - getUniqueID()); + unsigned FileID = + DD->getOrCreateSourceID(FN, NS.getDirectory(), getUniqueID()); assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); + addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, None, Line); } /// addVariableAddress - Add DW_AT_location attribute for a @@ -362,38 +435,38 @@ void CompileUnit::addVariableAddress(const DbgVariable &DV, DIE *Die, } /// addRegisterOp - Add register operand. -void CompileUnit::addRegisterOp(DIE *TheDie, unsigned Reg) { +void CompileUnit::addRegisterOp(DIEBlock *TheDie, unsigned Reg) { const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); unsigned DWReg = RI->getDwarfRegNum(Reg, false); if (DWReg < 32) - addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + DWReg); + addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + DWReg); else { - addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx); - addUInt(TheDie, 0, dwarf::DW_FORM_udata, DWReg); + addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_regx); + addUInt(TheDie, dwarf::DW_FORM_udata, DWReg); } } /// addRegisterOffset - Add register offset. -void CompileUnit::addRegisterOffset(DIE *TheDie, unsigned Reg, +void CompileUnit::addRegisterOffset(DIEBlock *TheDie, unsigned Reg, int64_t Offset) { const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); unsigned DWReg = RI->getDwarfRegNum(Reg, false); const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo(); if (Reg == TRI->getFrameRegister(*Asm->MF)) // If variable offset is based in frame register then use fbreg. - addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_fbreg); + addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_fbreg); else if (DWReg < 32) - addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + DWReg); + addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + DWReg); else { - addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); - addUInt(TheDie, 0, dwarf::DW_FORM_udata, DWReg); + addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); + addUInt(TheDie, dwarf::DW_FORM_udata, DWReg); } - addSInt(TheDie, 0, dwarf::DW_FORM_sdata, Offset); + addSInt(TheDie, dwarf::DW_FORM_sdata, Offset); } /// addAddress - Add an address attribute to a die based on the location /// provided. -void CompileUnit::addAddress(DIE *Die, unsigned Attribute, +void CompileUnit::addAddress(DIE *Die, dwarf::Attribute Attribute, const MachineLocation &Location, bool Indirect) { DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); @@ -402,12 +475,12 @@ void CompileUnit::addAddress(DIE *Die, unsigned Attribute, else { addRegisterOffset(Block, Location.getReg(), Location.getOffset()); if (Indirect && !Location.isReg()) { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); } } // Now attach the location information to the DIE. - addBlock(Die, Attribute, 0, Block); + addBlock(Die, Attribute, Block); } /// addComplexAddress - Start with the address based on the location provided, @@ -416,7 +489,7 @@ void CompileUnit::addAddress(DIE *Die, unsigned Attribute, /// the starting location. Add the DWARF information to the die. /// void CompileUnit::addComplexAddress(const DbgVariable &DV, DIE *Die, - unsigned Attribute, + dwarf::Attribute Attribute, const MachineLocation &Location) { DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); unsigned N = DV.getNumAddrElements(); @@ -429,23 +502,23 @@ void CompileUnit::addComplexAddress(const DbgVariable &DV, DIE *Die, i = 2; } else addRegisterOp(Block, Location.getReg()); - } - else + } else addRegisterOffset(Block, Location.getReg(), Location.getOffset()); - for (;i < N; ++i) { + for (; i < N; ++i) { uint64_t Element = DV.getAddrElement(i); if (Element == DIBuilder::OpPlus) { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - addUInt(Block, 0, dwarf::DW_FORM_udata, DV.getAddrElement(++i)); + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(Block, dwarf::DW_FORM_udata, DV.getAddrElement(++i)); } else if (Element == DIBuilder::OpDeref) { if (!Location.isReg()) - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); - } else llvm_unreachable("unknown DIBuilder Opcode"); + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + } else + llvm_unreachable("unknown DIBuilder Opcode"); } // Now attach the location information to the DIE. - addBlock(Die, Attribute, 0, Block); + addBlock(Die, Attribute, Block); } /* Byref variables, in Blocks, are declared by the programmer as "SomeType @@ -509,44 +582,41 @@ void CompileUnit::addComplexAddress(const DbgVariable &DV, DIE *Die, /// more information, read large comment just above here. /// void CompileUnit::addBlockByrefAddress(const DbgVariable &DV, DIE *Die, - unsigned Attribute, + dwarf::Attribute Attribute, const MachineLocation &Location) { DIType Ty = DV.getType(); DIType TmpTy = Ty; - unsigned Tag = Ty.getTag(); + uint16_t Tag = Ty.getTag(); bool isPointer = false; StringRef varName = DV.getName(); if (Tag == dwarf::DW_TAG_pointer_type) { - DIDerivedType DTy = DIDerivedType(Ty); - TmpTy = DTy.getTypeDerivedFrom(); + DIDerivedType DTy(Ty); + TmpTy = resolve(DTy.getTypeDerivedFrom()); isPointer = true; } - DICompositeType blockStruct = DICompositeType(TmpTy); + DICompositeType blockStruct(TmpTy); // Find the __forwarding field and the variable field in the __Block_byref // struct. DIArray Fields = blockStruct.getTypeArray(); - DIDescriptor varField = DIDescriptor(); - DIDescriptor forwardingField = DIDescriptor(); + DIDerivedType varField; + DIDerivedType forwardingField; for (unsigned i = 0, N = Fields.getNumElements(); i < N; ++i) { - DIDescriptor Element = Fields.getElement(i); - DIDerivedType DT = DIDerivedType(Element); + DIDerivedType DT(Fields.getElement(i)); StringRef fieldName = DT.getName(); if (fieldName == "__forwarding") - forwardingField = Element; + forwardingField = DT; else if (fieldName == varName) - varField = Element; + varField = DT; } // Get the offsets for the forwarding field and the variable field. - unsigned forwardingFieldOffset = - DIDerivedType(forwardingField).getOffsetInBits() >> 3; - unsigned varFieldOffset = - DIDerivedType(varField).getOffsetInBits() >> 3; + unsigned forwardingFieldOffset = forwardingField.getOffsetInBits() >> 3; + unsigned varFieldOffset = varField.getOffsetInBits() >> 2; // Decode the original location, and use that as the start of the byref // variable's location. @@ -560,45 +630,91 @@ void CompileUnit::addBlockByrefAddress(const DbgVariable &DV, DIE *Die, // If we started with a pointer to the __Block_byref... struct, then // the first thing we need to do is dereference the pointer (DW_OP_deref). if (isPointer) - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); // Next add the offset for the '__forwarding' field: // DW_OP_plus_uconst ForwardingFieldOffset. Note there's no point in // adding the offset if it's 0. if (forwardingFieldOffset > 0) { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - addUInt(Block, 0, dwarf::DW_FORM_udata, forwardingFieldOffset); + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(Block, dwarf::DW_FORM_udata, forwardingFieldOffset); } // Now dereference the __forwarding field to get to the real __Block_byref // struct: DW_OP_deref. - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); // Now that we've got the real __Block_byref... struct, add the offset // for the variable's field to get to the location of the actual variable: // DW_OP_plus_uconst varFieldOffset. Again, don't add if it's 0. if (varFieldOffset > 0) { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - addUInt(Block, 0, dwarf::DW_FORM_udata, varFieldOffset); + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(Block, dwarf::DW_FORM_udata, varFieldOffset); } // Now attach the location information to the DIE. - addBlock(Die, Attribute, 0, Block); + addBlock(Die, Attribute, Block); } /// isTypeSigned - Return true if the type is signed. -static bool isTypeSigned(DIType Ty, int *SizeInBits) { +static bool isTypeSigned(DwarfDebug *DD, DIType Ty, int *SizeInBits) { if (Ty.isDerivedType()) - return isTypeSigned(DIDerivedType(Ty).getTypeDerivedFrom(), SizeInBits); + return isTypeSigned(DD, DD->resolve(DIDerivedType(Ty).getTypeDerivedFrom()), + SizeInBits); if (Ty.isBasicType()) - if (DIBasicType(Ty).getEncoding() == dwarf::DW_ATE_signed - || DIBasicType(Ty).getEncoding() == dwarf::DW_ATE_signed_char) { + if (DIBasicType(Ty).getEncoding() == dwarf::DW_ATE_signed || + DIBasicType(Ty).getEncoding() == dwarf::DW_ATE_signed_char) { *SizeInBits = Ty.getSizeInBits(); return true; } return false; } +/// Return true if type encoding is unsigned. +static bool isUnsignedDIType(DwarfDebug *DD, DIType Ty) { + DIDerivedType DTy(Ty); + if (DTy.isDerivedType()) + return isUnsignedDIType(DD, DD->resolve(DTy.getTypeDerivedFrom())); + + DIBasicType BTy(Ty); + if (BTy.isBasicType()) { + unsigned Encoding = BTy.getEncoding(); + if (Encoding == dwarf::DW_ATE_unsigned || + Encoding == dwarf::DW_ATE_unsigned_char || + Encoding == dwarf::DW_ATE_boolean) + return true; + } + return false; +} + +/// If this type is derived from a base type then return base type size. +static uint64_t getBaseTypeSize(DwarfDebug *DD, DIDerivedType Ty) { + unsigned Tag = Ty.getTag(); + + if (Tag != dwarf::DW_TAG_member && Tag != dwarf::DW_TAG_typedef && + Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type && + Tag != dwarf::DW_TAG_restrict_type) + return Ty.getSizeInBits(); + + DIType BaseType = DD->resolve(Ty.getTypeDerivedFrom()); + + // If this type is not derived from any type then take conservative approach. + if (!BaseType.isValid()) + return Ty.getSizeInBits(); + + // If this is a derived type, go ahead and get the base type, unless it's a + // reference then it's just the size of the field. Pointer types have no need + // of this since they're a different type of qualification on the type. + if (BaseType.getTag() == dwarf::DW_TAG_reference_type || + BaseType.getTag() == dwarf::DW_TAG_rvalue_reference_type) + return Ty.getSizeInBits(); + + if (BaseType.isDerivedType()) + return getBaseTypeSize(DD, DIDerivedType(BaseType)); + + return BaseType.getSizeInBits(); +} + /// addConstantValue - Add constant value entry in variable DIE. void CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty) { @@ -606,32 +722,47 @@ void CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO, // their maximum bit width which is a bit unfortunate (& doesn't prefer // udata/sdata over dataN as suggested by the DWARF spec) assert(MO.isImm() && "Invalid machine operand!"); - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); int SizeInBits = -1; - bool SignedConstant = isTypeSigned(Ty, &SizeInBits); - unsigned Form = SignedConstant ? dwarf::DW_FORM_sdata : dwarf::DW_FORM_udata; - switch (SizeInBits) { - case 8: Form = dwarf::DW_FORM_data1; break; - case 16: Form = dwarf::DW_FORM_data2; break; - case 32: Form = dwarf::DW_FORM_data4; break; - case 64: Form = dwarf::DW_FORM_data8; break; - default: break; + bool SignedConstant = isTypeSigned(DD, Ty, &SizeInBits); + dwarf::Form Form; + + // If we're a signed constant definitely use sdata. + if (SignedConstant) { + addSInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, MO.getImm()); + return; } - SignedConstant ? addSInt(Block, 0, Form, MO.getImm()) - : addUInt(Block, 0, Form, MO.getImm()); - addBlock(Die, dwarf::DW_AT_const_value, 0, Block); + // Else use data for now unless it's larger than we can deal with. + switch (SizeInBits) { + case 8: + Form = dwarf::DW_FORM_data1; + break; + case 16: + Form = dwarf::DW_FORM_data2; + break; + case 32: + Form = dwarf::DW_FORM_data4; + break; + case 64: + Form = dwarf::DW_FORM_data8; + break; + default: + Form = dwarf::DW_FORM_udata; + addUInt(Die, dwarf::DW_AT_const_value, Form, MO.getImm()); + return; + } + addUInt(Die, dwarf::DW_AT_const_value, Form, MO.getImm()); } /// addConstantFPValue - Add constant value entry in variable DIE. void CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) { - assert (MO.isFPImm() && "Invalid machine operand!"); + assert(MO.isFPImm() && "Invalid machine operand!"); DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); APFloat FPImm = MO.getFPImm()->getValueAPF(); // Get the raw data form of the floating point. const APInt FltVal = FPImm.bitcastToAPInt(); - const char *FltPtr = (const char*)FltVal.getRawData(); + const char *FltPtr = (const char *)FltVal.getRawData(); int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte. bool LittleEndian = Asm->getDataLayout().isLittleEndian(); @@ -641,15 +772,15 @@ void CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) { // Output the constant to DWARF one byte at a time. for (; Start != Stop; Start += Incr) - addUInt(Block, 0, dwarf::DW_FORM_data1, - (unsigned char)0xFF & FltPtr[Start]); + addUInt(Block, dwarf::DW_FORM_data1, (unsigned char)0xFF & FltPtr[Start]); - addBlock(Die, dwarf::DW_AT_const_value, 0, Block); + addBlock(Die, dwarf::DW_AT_const_value, Block); } /// addConstantFPValue - Add constant value entry in variable DIE. void CompileUnit::addConstantFPValue(DIE *Die, const ConstantFP *CFP) { - addConstantValue(Die, CFP->getValueAPF().bitcastToAPInt(), false); + // Pass this down to addConstantValue as an unsigned bag of bits. + addConstantValue(Die, CFP->getValueAPF().bitcastToAPInt(), true); } /// addConstantValue - Add constant value entry in variable DIE. @@ -662,19 +793,34 @@ void CompileUnit::addConstantValue(DIE *Die, const ConstantInt *CI, void CompileUnit::addConstantValue(DIE *Die, const APInt &Val, bool Unsigned) { unsigned CIBitWidth = Val.getBitWidth(); if (CIBitWidth <= 64) { - unsigned form = 0; + // If we're a signed constant definitely use sdata. + if (!Unsigned) { + addSInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, + Val.getSExtValue()); + return; + } + + // Else use data for now unless it's larger than we can deal with. + dwarf::Form Form; switch (CIBitWidth) { - case 8: form = dwarf::DW_FORM_data1; break; - case 16: form = dwarf::DW_FORM_data2; break; - case 32: form = dwarf::DW_FORM_data4; break; - case 64: form = dwarf::DW_FORM_data8; break; + case 8: + Form = dwarf::DW_FORM_data1; + break; + case 16: + Form = dwarf::DW_FORM_data2; + break; + case 32: + Form = dwarf::DW_FORM_data4; + break; + case 64: + Form = dwarf::DW_FORM_data8; + break; default: - form = Unsigned ? dwarf::DW_FORM_udata : dwarf::DW_FORM_sdata; + addUInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, + Val.getZExtValue()); + return; } - if (Unsigned) - addUInt(Die, dwarf::DW_AT_const_value, form, Val.getZExtValue()); - else - addSInt(Die, dwarf::DW_AT_const_value, form, Val.getSExtValue()); + addUInt(Die, dwarf::DW_AT_const_value, Form, Val.getZExtValue()); return; } @@ -693,10 +839,10 @@ void CompileUnit::addConstantValue(DIE *Die, const APInt &Val, bool Unsigned) { c = Ptr64[i / 8] >> (8 * (i & 7)); else c = Ptr64[(NumBytes - 1 - i) / 8] >> (8 * ((NumBytes - 1 - i) & 7)); - addUInt(Block, 0, dwarf::DW_FORM_data1, c); + addUInt(Block, dwarf::DW_FORM_data1, c); } - addBlock(Die, dwarf::DW_AT_const_value, 0, Block); + addBlock(Die, dwarf::DW_AT_const_value, Block); } /// addTemplateParams - Add template parameters into buffer. @@ -705,47 +851,48 @@ void CompileUnit::addTemplateParams(DIE &Buffer, DIArray TParams) { for (unsigned i = 0, e = TParams.getNumElements(); i != e; ++i) { DIDescriptor Element = TParams.getElement(i); if (Element.isTemplateTypeParameter()) - Buffer.addChild(getOrCreateTemplateTypeParameterDIE( - DITemplateTypeParameter(Element))); + constructTemplateTypeParameterDIE(Buffer, + DITemplateTypeParameter(Element)); else if (Element.isTemplateValueParameter()) - Buffer.addChild(getOrCreateTemplateValueParameterDIE( - DITemplateValueParameter(Element))); + constructTemplateValueParameterDIE(Buffer, + DITemplateValueParameter(Element)); } } /// getOrCreateContextDIE - Get context owner's DIE. -DIE *CompileUnit::getOrCreateContextDIE(DIDescriptor Context) { +DIE *CompileUnit::getOrCreateContextDIE(DIScope Context) { + if (!Context || Context.isFile()) + return getCUDie(); if (Context.isType()) return getOrCreateTypeDIE(DIType(Context)); - else if (Context.isNameSpace()) + if (Context.isNameSpace()) return getOrCreateNameSpace(DINameSpace(Context)); - else if (Context.isSubprogram()) + if (Context.isSubprogram()) return getOrCreateSubprogramDIE(DISubprogram(Context)); - else - return getDIE(Context); -} - -/// addToContextOwner - Add Die into the list of its context owner's children. -void CompileUnit::addToContextOwner(DIE *Die, DIDescriptor Context) { - if (DIE *ContextDIE = getOrCreateContextDIE(Context)) - ContextDIE->addChild(Die); - else - addDie(Die); + return getDIE(Context); } /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the /// given DIType. DIE *CompileUnit::getOrCreateTypeDIE(const MDNode *TyNode) { - DIType Ty(TyNode); - if (!Ty.isType()) + if (!TyNode) return NULL; + + DIType Ty(TyNode); + assert(Ty.isType()); + + // Construct the context before querying for the existence of the DIE in case + // such construction creates the DIE. + DIE *ContextDIE = getOrCreateContextDIE(resolve(Ty.getContext())); + assert(ContextDIE); + DIE *TyDIE = getDIE(Ty); if (TyDIE) return TyDIE; // Create new type. - TyDIE = new DIE(dwarf::DW_TAG_base_type); - insertDIE(Ty, TyDIE); + TyDIE = createAndAddDIE(Ty.getTag(), *ContextDIE, Ty); + if (Ty.isBasicType()) constructTypeDIE(*TyDIE, DIBasicType(Ty)); else if (Ty.isCompositeType()) @@ -762,28 +909,24 @@ DIE *CompileUnit::getOrCreateTypeDIE(const MDNode *TyNode) { DICompositeType CT(Ty); // A runtime language of 0 actually means C/C++ and that any // non-negative value is some version of Objective-C/C++. - IsImplementation = (CT.getRunTimeLang() == 0) || - CT.isObjcClassComplete(); + IsImplementation = (CT.getRunTimeLang() == 0) || CT.isObjcClassComplete(); } - unsigned Flags = IsImplementation ? - DwarfAccelTable::eTypeFlagClassIsImplementation : 0; + unsigned Flags = IsImplementation ? dwarf::DW_FLAG_type_implementation : 0; addAccelType(Ty.getName(), std::make_pair(TyDIE, Flags)); } - addToContextOwner(TyDIE, Ty.getContext()); return TyDIE; } /// addType - Add a new type attribute to the specified entity. -void CompileUnit::addType(DIE *Entity, DIType Ty, unsigned Attribute) { - if (!Ty.isType()) - return; +void CompileUnit::addType(DIE *Entity, DIType Ty, dwarf::Attribute Attribute) { + assert(Ty && "Trying to add a type that doesn't exist?"); // Check for pre-existence. DIEEntry *Entry = getDIEEntry(Ty); // If it exists then use the existing value. if (Entry) { - Entity->addValue(Attribute, dwarf::DW_FORM_ref4, Entry); + addDIEEntry(Entity, Attribute, Entry); return; } @@ -793,28 +936,105 @@ void CompileUnit::addType(DIE *Entity, DIType Ty, unsigned Attribute) { // Set up proxy. Entry = createDIEEntry(Buffer); insertDIEEntry(Ty, Entry); - Entity->addValue(Attribute, dwarf::DW_FORM_ref4, Entry); + addDIEEntry(Entity, Attribute, Entry); // If this is a complete composite type then include it in the // list of global types. addGlobalType(Ty); } +// Accelerator table mutators - add each name along with its companion +// DIE to the proper table while ensuring that the name that we're going +// to reference is in the string table. We do this since the names we +// add may not only be identical to the names in the DIE. +void CompileUnit::addAccelName(StringRef Name, DIE *Die) { + DU->getStringPoolEntry(Name); + std::vector<DIE *> &DIEs = AccelNames[Name]; + DIEs.push_back(Die); +} + +void CompileUnit::addAccelObjC(StringRef Name, DIE *Die) { + DU->getStringPoolEntry(Name); + std::vector<DIE *> &DIEs = AccelObjC[Name]; + DIEs.push_back(Die); +} + +void CompileUnit::addAccelNamespace(StringRef Name, DIE *Die) { + DU->getStringPoolEntry(Name); + std::vector<DIE *> &DIEs = AccelNamespace[Name]; + DIEs.push_back(Die); +} + +void CompileUnit::addAccelType(StringRef Name, std::pair<DIE *, unsigned> Die) { + DU->getStringPoolEntry(Name); + std::vector<std::pair<DIE *, unsigned> > &DIEs = AccelTypes[Name]; + DIEs.push_back(Die); +} + +/// addGlobalName - Add a new global name to the compile unit. +void CompileUnit::addGlobalName(StringRef Name, DIE *Die, DIScope Context) { + std::string FullName = getParentContextString(Context) + Name.str(); + GlobalNames[FullName] = Die; +} + /// addGlobalType - Add a new global type to the compile unit. /// void CompileUnit::addGlobalType(DIType Ty) { - DIDescriptor Context = Ty.getContext(); - if (Ty.isCompositeType() && !Ty.getName().empty() && !Ty.isForwardDecl() - && (!Context || Context.isCompileUnit() || Context.isFile() - || Context.isNameSpace())) - if (DIEEntry *Entry = getDIEEntry(Ty)) - GlobalTypes[Ty.getName()] = Entry->getEntry(); + DIScope Context = resolve(Ty.getContext()); + if (!Ty.getName().empty() && !Ty.isForwardDecl() && + (!Context || Context.isCompileUnit() || Context.isFile() || + Context.isNameSpace())) + if (DIEEntry *Entry = getDIEEntry(Ty)) { + std::string FullName = + getParentContextString(Context) + Ty.getName().str(); + GlobalTypes[FullName] = Entry->getEntry(); + } +} + +/// getParentContextString - Walks the metadata parent chain in a language +/// specific manner (using the compile unit language) and returns +/// it as a string. This is done at the metadata level because DIEs may +/// not currently have been added to the parent context and walking the +/// DIEs looking for names is more expensive than walking the metadata. +std::string CompileUnit::getParentContextString(DIScope Context) const { + if (!Context) + return ""; + + // FIXME: Decide whether to implement this for non-C++ languages. + if (getLanguage() != dwarf::DW_LANG_C_plus_plus) + return ""; + + std::string CS; + SmallVector<DIScope, 1> Parents; + while (!Context.isCompileUnit()) { + Parents.push_back(Context); + if (Context.getContext()) + Context = resolve(Context.getContext()); + else + // Structure, etc types will have a NULL context if they're at the top + // level. + break; + } + + // Reverse iterate over our list to go from the outermost construct to the + // innermost. + for (SmallVectorImpl<DIScope>::reverse_iterator I = Parents.rbegin(), + E = Parents.rend(); + I != E; ++I) { + DIScope Ctx = *I; + StringRef Name = Ctx.getName(); + if (!Name.empty()) { + CS += Name; + CS += "::"; + } + } + return CS; } -/// addPubTypes - Add type for pubtypes section. +/// addPubTypes - Add subprogram argument types for pubtypes section. void CompileUnit::addPubTypes(DISubprogram SP) { DICompositeType SPTy = SP.getType(); - unsigned SPTag = SPTy.getTag(); + uint16_t SPTag = SPTy.getTag(); if (SPTag != dwarf::DW_TAG_subroutine_type) return; @@ -835,18 +1055,15 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) { if (!Name.empty()) addString(&Buffer, dwarf::DW_AT_name, Name); - if (BTy.getTag() == dwarf::DW_TAG_unspecified_type) { - Buffer.setTag(dwarf::DW_TAG_unspecified_type); - // An unspecified type only has a name attribute. + // An unspecified type only has a name attribute. + if (BTy.getTag() == dwarf::DW_TAG_unspecified_type) return; - } - Buffer.setTag(dwarf::DW_TAG_base_type); addUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, BTy.getEncoding()); uint64_t Size = BTy.getSizeInBits() >> 3; - addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); + addUInt(&Buffer, dwarf::DW_AT_byte_size, None, Size); } /// constructTypeDIE - Construct derived type die from DIDerivedType. @@ -854,16 +1071,12 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) { // Get core information. StringRef Name = DTy.getName(); uint64_t Size = DTy.getSizeInBits() >> 3; - unsigned Tag = DTy.getTag(); - - // FIXME - Workaround for templates. - if (Tag == dwarf::DW_TAG_inheritance) Tag = dwarf::DW_TAG_reference_type; - - Buffer.setTag(Tag); + uint16_t Tag = Buffer.getTag(); // Map to main type, void will not have a type. - DIType FromTy = DTy.getTypeDerivedFrom(); - addType(&Buffer, FromTy); + DIType FromTy = resolve(DTy.getTypeDerivedFrom()); + if (FromTy) + addType(&Buffer, FromTy); // Add name if not anonymous or intermediate type. if (!Name.empty()) @@ -871,11 +1084,11 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) { // Add size if non-zero (derived types might be zero-sized.) if (Size && Tag != dwarf::DW_TAG_pointer_type) - addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); + addUInt(&Buffer, dwarf::DW_AT_byte_size, None, Size); if (Tag == dwarf::DW_TAG_ptr_to_member_type) - addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, - getOrCreateTypeDIE(DTy.getClassType())); + addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, + getOrCreateTypeDIE(resolve(DTy.getClassType()))); // Add source line info if available and TyDesc is not a forward declaration. if (!DTy.isForwardDecl()) addSourceLine(&Buffer, DTy); @@ -883,20 +1096,20 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) { /// Return true if the type is appropriately scoped to be contained inside /// its own type unit. -static bool isTypeUnitScoped(DIType Ty) { - DIScope Parent = Ty.getContext(); +static bool isTypeUnitScoped(DIType Ty, const DwarfDebug *DD) { + DIScope Parent = DD->resolve(Ty.getContext()); while (Parent) { // Don't generate a hash for anything scoped inside a function. if (Parent.isSubprogram()) return false; - Parent = Parent.getContext(); + Parent = DD->resolve(Parent.getContext()); } return true; } /// Return true if the type should be split out into a type unit. -static bool shouldCreateTypeUnit(DICompositeType CTy) { - unsigned Tag = CTy.getTag(); +static bool shouldCreateTypeUnit(DICompositeType CTy, const DwarfDebug *DD) { + uint16_t Tag = CTy.getTag(); switch (Tag) { case dwarf::DW_TAG_structure_type: @@ -904,13 +1117,11 @@ static bool shouldCreateTypeUnit(DICompositeType CTy) { case dwarf::DW_TAG_enumeration_type: case dwarf::DW_TAG_class_type: // If this is a class, structure, union, or enumeration type - // that is not a declaration, is a type definition, and not scoped + // that is a definition (not a declaration), and not scoped // inside a function then separate this out as a type unit. - if (CTy.isForwardDecl() || !isTypeUnitScoped(CTy)) - return 0; - return 1; + return !CTy.isForwardDecl() && isTypeUnitScoped(CTy, DD); default: - return 0; + return false; } } @@ -920,69 +1131,47 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { StringRef Name = CTy.getName(); uint64_t Size = CTy.getSizeInBits() >> 3; - unsigned Tag = CTy.getTag(); - Buffer.setTag(Tag); + uint16_t Tag = Buffer.getTag(); switch (Tag) { case dwarf::DW_TAG_array_type: - constructArrayTypeDIE(Buffer, &CTy); + constructArrayTypeDIE(Buffer, CTy); break; - case dwarf::DW_TAG_enumeration_type: { - DIArray Elements = CTy.getTypeArray(); - - // Add enumerators to enumeration type. - for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { - DIE *ElemDie = NULL; - DIDescriptor Enum(Elements.getElement(i)); - if (Enum.isEnumerator()) { - ElemDie = constructEnumTypeDIE(DIEnumerator(Enum)); - Buffer.addChild(ElemDie); - } - } - DIType DTy = CTy.getTypeDerivedFrom(); - if (DTy.isType()) { - addType(&Buffer, DTy); - addUInt(&Buffer, dwarf::DW_AT_enum_class, dwarf::DW_FORM_flag, 1); - } - } + case dwarf::DW_TAG_enumeration_type: + constructEnumTypeDIE(Buffer, CTy); break; case dwarf::DW_TAG_subroutine_type: { - // Add return type. + // Add return type. A void return won't have a type. DIArray Elements = CTy.getTypeArray(); - DIDescriptor RTy = Elements.getElement(0); - addType(&Buffer, DIType(RTy)); + DIType RTy(Elements.getElement(0)); + if (RTy) + addType(&Buffer, RTy); bool isPrototyped = true; // Add arguments. for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) { DIDescriptor Ty = Elements.getElement(i); if (Ty.isUnspecifiedParameter()) { - DIE *Arg = new DIE(dwarf::DW_TAG_unspecified_parameters); - Buffer.addChild(Arg); + createAndAddDIE(dwarf::DW_TAG_unspecified_parameters, Buffer); isPrototyped = false; } else { - DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); + DIE *Arg = createAndAddDIE(dwarf::DW_TAG_formal_parameter, Buffer); addType(Arg, DIType(Ty)); if (DIType(Ty).isArtificial()) addFlag(Arg, dwarf::DW_AT_artificial); - Buffer.addChild(Arg); } } // Add prototype flag if we're dealing with a C language and the // function has been prototyped. + uint16_t Language = getLanguage(); if (isPrototyped && - (Language == dwarf::DW_LANG_C89 || - Language == dwarf::DW_LANG_C99 || + (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 || Language == dwarf::DW_LANG_ObjC)) addFlag(&Buffer, dwarf::DW_AT_prototyped); - } - break; + } break; case dwarf::DW_TAG_structure_type: case dwarf::DW_TAG_union_type: case dwarf::DW_TAG_class_type: { - if (CTy.isForwardDecl()) - break; - // Add elements to structure type. DIArray Elements = CTy.getTypeArray(); for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { @@ -990,7 +1179,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { DIE *ElemDie = NULL; if (Element.isSubprogram()) { DISubprogram SP(Element); - ElemDie = getOrCreateSubprogramDIE(DISubprogram(Element)); + ElemDie = getOrCreateSubprogramDIE(SP); if (SP.isProtected()) addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_protected); @@ -999,21 +1188,23 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { dwarf::DW_ACCESS_private); else addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, - dwarf::DW_ACCESS_public); + dwarf::DW_ACCESS_public); if (SP.isExplicit()) addFlag(ElemDie, dwarf::DW_AT_explicit); } else if (Element.isDerivedType()) { DIDerivedType DDTy(Element); if (DDTy.getTag() == dwarf::DW_TAG_friend) { - ElemDie = new DIE(dwarf::DW_TAG_friend); - addType(ElemDie, DDTy.getTypeDerivedFrom(), dwarf::DW_AT_friend); - } else if (DDTy.isStaticMember()) - ElemDie = createStaticMemberDIE(DDTy); - else - ElemDie = createMemberDIE(DDTy); + ElemDie = createAndAddDIE(dwarf::DW_TAG_friend, Buffer); + addType(ElemDie, resolve(DDTy.getTypeDerivedFrom()), + dwarf::DW_AT_friend); + } else if (DDTy.isStaticMember()) { + getOrCreateStaticMemberDIE(DDTy); + } else { + constructMemberDIE(Buffer, DDTy); + } } else if (Element.isObjCProperty()) { DIObjCProperty Property(Element); - ElemDie = new DIE(Property.getTag()); + ElemDie = createAndAddDIE(Property.getTag(), Buffer); StringRef PropertyName = Property.getObjCPropertyName(); addString(ElemDie, dwarf::DW_AT_APPLE_property_name, PropertyName); addType(ElemDie, Property.getType()); @@ -1038,8 +1229,8 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { if (Property.isNonAtomicObjCProperty()) PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_nonatomic; if (PropertyAttributes) - addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, 0, - PropertyAttributes); + addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, None, + PropertyAttributes); DIEEntry *Entry = getDIEEntry(Element); if (!Entry) { @@ -1048,18 +1239,15 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { } } else continue; - Buffer.addChild(ElemDie); } if (CTy.isAppleBlockExtension()) addFlag(&Buffer, dwarf::DW_AT_APPLE_block); - DICompositeType ContainingType = CTy.getContainingType(); - if (DIDescriptor(ContainingType).isCompositeType()) - addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, - getOrCreateTypeDIE(DIType(ContainingType))); - else - addToContextOwner(&Buffer, CTy.getContext()); + DICompositeType ContainingType(resolve(CTy.getContainingType())); + if (ContainingType) + addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, + getOrCreateTypeDIE(ContainingType)); if (CTy.isObjcClassComplete()) addFlag(&Buffer, dwarf::DW_AT_APPLE_objc_complete_type); @@ -1067,8 +1255,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { // Add template parameters to a class, structure or union types. // FIXME: The support isn't in the metadata for this yet. if (Tag == dwarf::DW_TAG_class_type || - Tag == dwarf::DW_TAG_structure_type || - Tag == dwarf::DW_TAG_union_type) + Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) addTemplateParams(Buffer, CTy.getTemplateParams()); break; @@ -1082,16 +1269,15 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { addString(&Buffer, dwarf::DW_AT_name, Name); if (Tag == dwarf::DW_TAG_enumeration_type || - Tag == dwarf::DW_TAG_class_type || - Tag == dwarf::DW_TAG_structure_type || + Tag == dwarf::DW_TAG_class_type || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) { // Add size if non-zero (derived types might be zero-sized.) // TODO: Do we care about size for enum forward declarations? if (Size) - addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); + addUInt(&Buffer, dwarf::DW_AT_byte_size, None, Size); else if (!CTy.isForwardDecl()) // Add zero size if it is not a forward declaration. - addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, 0); + addUInt(&Buffer, dwarf::DW_AT_byte_size, None, 0); // If we're a forward decl, say so. if (CTy.isForwardDecl()) @@ -1104,131 +1290,126 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { // No harm in adding the runtime language to the declaration. unsigned RLang = CTy.getRunTimeLang(); if (RLang) - addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class, - dwarf::DW_FORM_data1, RLang); + addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class, dwarf::DW_FORM_data1, + RLang); } // If this is a type applicable to a type unit it then add it to the // list of types we'll compute a hash for later. - if (shouldCreateTypeUnit(CTy)) + if (shouldCreateTypeUnit(CTy, DD)) DD->addTypeUnitType(&Buffer); } -/// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE -/// for the given DITemplateTypeParameter. -DIE * -CompileUnit::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) { - DIE *ParamDIE = getDIE(TP); - if (ParamDIE) - return ParamDIE; - - ParamDIE = new DIE(dwarf::DW_TAG_template_type_parameter); - addType(ParamDIE, TP.getType()); +/// constructTemplateTypeParameterDIE - Construct new DIE for the given +/// DITemplateTypeParameter. +void +CompileUnit::constructTemplateTypeParameterDIE(DIE &Buffer, + DITemplateTypeParameter TP) { + DIE *ParamDIE = + createAndAddDIE(dwarf::DW_TAG_template_type_parameter, Buffer); + // Add the type if it exists, it could be void and therefore no type. + if (TP.getType()) + addType(ParamDIE, resolve(TP.getType())); if (!TP.getName().empty()) addString(ParamDIE, dwarf::DW_AT_name, TP.getName()); - return ParamDIE; -} - -/// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE -/// for the given DITemplateValueParameter. -DIE * -CompileUnit::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV){ - DIE *ParamDIE = getDIE(TPV); - if (ParamDIE) - return ParamDIE; - - ParamDIE = new DIE(TPV.getTag()); - addType(ParamDIE, TPV.getType()); - if (!TPV.getName().empty()) - addString(ParamDIE, dwarf::DW_AT_name, TPV.getName()); - if (Value *Val = TPV.getValue()) { +} + +/// constructTemplateValueParameterDIE - Construct new DIE for the given +/// DITemplateValueParameter. +void +CompileUnit::constructTemplateValueParameterDIE(DIE &Buffer, + DITemplateValueParameter VP) { + DIE *ParamDIE = createAndAddDIE(VP.getTag(), Buffer); + + // Add the type if there is one, template template and template parameter + // packs will not have a type. + if (VP.getTag() == dwarf::DW_TAG_template_value_parameter) + addType(ParamDIE, resolve(VP.getType())); + if (!VP.getName().empty()) + addString(ParamDIE, dwarf::DW_AT_name, VP.getName()); + if (Value *Val = VP.getValue()) { if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) - addConstantValue(ParamDIE, CI, TPV.getType().isUnsignedDIType()); + addConstantValue(ParamDIE, CI, + isUnsignedDIType(DD, resolve(VP.getType()))); else if (GlobalValue *GV = dyn_cast<GlobalValue>(Val)) { // For declaration non-type template parameters (such as global values and // functions) DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - addOpAddress(Block, Asm->Mang->getSymbol(GV)); + addOpAddress(Block, Asm->getSymbol(GV)); // Emit DW_OP_stack_value to use the address as the immediate value of the // parameter, rather than a pointer to it. - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value); - addBlock(ParamDIE, dwarf::DW_AT_location, 0, Block); - } else if (TPV.getTag() == dwarf::DW_TAG_GNU_template_template_param) { + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value); + addBlock(ParamDIE, dwarf::DW_AT_location, Block); + } else if (VP.getTag() == dwarf::DW_TAG_GNU_template_template_param) { assert(isa<MDString>(Val)); addString(ParamDIE, dwarf::DW_AT_GNU_template_name, cast<MDString>(Val)->getString()); - } else if (TPV.getTag() == dwarf::DW_TAG_GNU_template_parameter_pack) { + } else if (VP.getTag() == dwarf::DW_TAG_GNU_template_parameter_pack) { assert(isa<MDNode>(Val)); DIArray A(cast<MDNode>(Val)); addTemplateParams(*ParamDIE, A); } } - - return ParamDIE; } /// getOrCreateNameSpace - Create a DIE for DINameSpace. DIE *CompileUnit::getOrCreateNameSpace(DINameSpace NS) { + // Construct the context before querying for the existence of the DIE in case + // such construction creates the DIE. + DIE *ContextDIE = getOrCreateContextDIE(NS.getContext()); + DIE *NDie = getDIE(NS); if (NDie) return NDie; - NDie = new DIE(dwarf::DW_TAG_namespace); - insertDIE(NS, NDie); + NDie = createAndAddDIE(dwarf::DW_TAG_namespace, *ContextDIE, NS); + if (!NS.getName().empty()) { addString(NDie, dwarf::DW_AT_name, NS.getName()); addAccelNamespace(NS.getName(), NDie); + addGlobalName(NS.getName(), NDie, NS.getContext()); } else addAccelNamespace("(anonymous namespace)", NDie); addSourceLine(NDie, NS); - addToContextOwner(NDie, NS.getContext()); return NDie; } /// getOrCreateSubprogramDIE - Create new DIE using SP. DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { + // Construct the context before querying for the existence of the DIE in case + // such construction creates the DIE (as is the case for member function + // declarations). + DIE *ContextDIE = getOrCreateContextDIE(resolve(SP.getContext())); + DIE *SPDie = getDIE(SP); if (SPDie) return SPDie; - SPDie = new DIE(dwarf::DW_TAG_subprogram); + DISubprogram SPDecl = SP.getFunctionDeclaration(); + if (SPDecl.isSubprogram()) + // Add subprogram definitions to the CU die directly. + ContextDIE = CUDie.get(); // DW_TAG_inlined_subroutine may refer to this DIE. - insertDIE(SP, SPDie); + SPDie = createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, SP); - DISubprogram SPDecl = SP.getFunctionDeclaration(); DIE *DeclDie = NULL; - if (SPDecl.isSubprogram()) { + if (SPDecl.isSubprogram()) DeclDie = getOrCreateSubprogramDIE(SPDecl); - } - - // Add to context owner. - addToContextOwner(SPDie, SP.getContext()); // Add function template parameters. addTemplateParams(*SPDie, SP.getTemplateParams()); - // Unfortunately this code needs to stay here instead of below the - // AT_specification code in order to work around a bug in older - // gdbs that requires the linkage name to resolve multiple template - // functions. - // TODO: Remove this set of code when we get rid of the old gdb - // compatibility. - StringRef LinkageName = SP.getLinkageName(); - if (!LinkageName.empty() && DD->useDarwinGDBCompat()) - addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, - GlobalValue::getRealLinkageName(LinkageName)); - // If this DIE is going to refer declaration info using AT_specification // then there is no need to add other attributes. if (DeclDie) { // Refer function declaration directly. - addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, - DeclDie); + addDIEEntry(SPDie, dwarf::DW_AT_specification, DeclDie); return SPDie; } // Add the linkage name if we have one. - if (!LinkageName.empty() && !DD->useDarwinGDBCompat()) + StringRef LinkageName = SP.getLinkageName(); + if (!LinkageName.empty()) addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, GlobalValue::getRealLinkageName(LinkageName)); @@ -1240,29 +1421,31 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { // Add the prototype if we have a prototype and we have a C like // language. + uint16_t Language = getLanguage(); if (SP.isPrototyped() && - (Language == dwarf::DW_LANG_C89 || - Language == dwarf::DW_LANG_C99 || + (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 || Language == dwarf::DW_LANG_ObjC)) addFlag(SPDie, dwarf::DW_AT_prototyped); - // Add Return Type. DICompositeType SPTy = SP.getType(); assert(SPTy.getTag() == dwarf::DW_TAG_subroutine_type && "the type of a subprogram should be a subroutine"); DIArray Args = SPTy.getTypeArray(); - addType(SPDie, DIType(Args.getElement(0))); + // Add a return type. If this is a type like a C/C++ void type we don't add a + // return type. + if (Args.getElement(0)) + addType(SPDie, DIType(Args.getElement(0))); unsigned VK = SP.getVirtuality(); if (VK) { addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, VK); DIEBlock *Block = getDIEBlock(); - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); - addUInt(Block, 0, dwarf::DW_FORM_udata, SP.getVirtualIndex()); - addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block); - ContainingTypeMap.insert(std::make_pair(SPDie, - SP.getContainingType())); + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + addUInt(Block, dwarf::DW_FORM_udata, SP.getVirtualIndex()); + addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, Block); + ContainingTypeMap.insert( + std::make_pair(SPDie, resolve(SP.getContainingType()))); } if (!SP.isDefinition()) { @@ -1270,13 +1453,12 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { // Add arguments. Do not add arguments for subprogram definition. They will // be handled while processing variables. - for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { - DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); - DIType ATy = DIType(Args.getElement(i)); + for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { + DIE *Arg = createAndAddDIE(dwarf::DW_TAG_formal_parameter, *SPDie); + DIType ATy(Args.getElement(i)); addType(Arg, ATy); if (ATy.isArtificial()) addFlag(Arg, dwarf::DW_AT_artificial); - SPDie->addChild(Arg); } } @@ -1324,16 +1506,16 @@ static const ConstantExpr *getMergedGlobalExpr(const Value *V) { } /// createGlobalVariableDIE - create global variable DIE. -void CompileUnit::createGlobalVariableDIE(const MDNode *N) { +void CompileUnit::createGlobalVariableDIE(DIGlobalVariable GV) { + // Check for pre-existence. - if (getDIE(N)) + if (getDIE(GV)) return; - DIGlobalVariable GV(N); if (!GV.isGlobalVariable()) return; - DIDescriptor GVContext = GV.getContext(); + DIScope GVContext = GV.getContext(); DIType GTy = GV.getType(); // If this is a static data member definition, some attributes belong @@ -1344,35 +1526,30 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { if (SDMDecl.Verify()) { assert(SDMDecl.isStaticMember() && "Expected static member decl"); // We need the declaration DIE that is in the static member's class. - // But that class might not exist in the DWARF yet. - // Creating the class will create the static member decl DIE. - getOrCreateContextDIE(SDMDecl.getContext()); - VariableDIE = getDIE(SDMDecl); - assert(VariableDIE && "Static member decl has no context?"); + VariableDIE = getOrCreateStaticMemberDIE(SDMDecl); IsStaticMember = true; } // If this is not a static data member definition, create the variable // DIE and add the initial set of attributes to it. if (!VariableDIE) { - VariableDIE = new DIE(GV.getTag()); + // Construct the context before querying for the existence of the DIE in + // case such construction creates the DIE. + DIE *ContextDIE = getOrCreateContextDIE(GVContext); + // Add to map. - insertDIE(N, VariableDIE); + VariableDIE = createAndAddDIE(GV.getTag(), *ContextDIE, GV); // Add name and type. addString(VariableDIE, dwarf::DW_AT_name, GV.getDisplayName()); addType(VariableDIE, GTy); // Add scoping info. - if (!GV.isLocalToUnit()) { + if (!GV.isLocalToUnit()) addFlag(VariableDIE, dwarf::DW_AT_external); - addGlobalName(GV.getName(), VariableDIE); - } // Add line number info. addSourceLine(VariableDIE, GV); - // Add to context owner. - addToContextOwner(VariableDIE, GVContext); } // Add location. @@ -1382,7 +1559,7 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { if (isGlobalVariable) { addToAccelTable = true; DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - const MCSymbol *Sym = Asm->Mang->getSymbol(GV.getGlobal()); + const MCSymbol *Sym = Asm->getSymbol(GV.getGlobal()); if (GV.getGlobal()->isThreadLocal()) { // FIXME: Make this work with -gsplit-dwarf. unsigned PointerSize = Asm->getDataLayout().getPointerSize(); @@ -1393,68 +1570,62 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { // Based on GCC's support for TLS: if (!DD->useSplitDwarf()) { // 1) Start with a constNu of the appropriate pointer size - addUInt(Block, 0, dwarf::DW_FORM_data1, + addUInt(Block, dwarf::DW_FORM_data1, PointerSize == 4 ? dwarf::DW_OP_const4u : dwarf::DW_OP_const8u); - // 2) containing the (relocated) address of the TLS variable - addExpr(Block, 0, dwarf::DW_FORM_udata, Expr); + // 2) containing the (relocated) offset of the TLS variable + // within the module's TLS block. + addExpr(Block, dwarf::DW_FORM_udata, Expr); } else { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index); - addUInt(Block, 0, dwarf::DW_FORM_udata, DU->getAddrPoolIndex(Expr)); + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index); + addUInt(Block, dwarf::DW_FORM_udata, DU->getAddrPoolIndex(Expr)); } - // 3) followed by a custom OP to tell the debugger about TLS (presumably) - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_lo_user); + // 3) followed by a custom OP to make the debugger do a TLS lookup. + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_push_tls_address); } else addOpAddress(Block, Sym); // Do not create specification DIE if context is either compile unit // or a subprogram. if (GVContext && GV.isDefinition() && !GVContext.isCompileUnit() && - !GVContext.isFile() && !isSubprogramContext(GVContext)) { + !GVContext.isFile() && !DD->isSubprogramContext(GVContext)) { // Create specification DIE. - VariableSpecDIE = new DIE(dwarf::DW_TAG_variable); - addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, - dwarf::DW_FORM_ref4, VariableDIE); - addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block); + VariableSpecDIE = createAndAddDIE(dwarf::DW_TAG_variable, *CUDie); + addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, VariableDIE); + addBlock(VariableSpecDIE, dwarf::DW_AT_location, Block); // A static member's declaration is already flagged as such. if (!SDMDecl.Verify()) addFlag(VariableDIE, dwarf::DW_AT_declaration); - addDie(VariableSpecDIE); } else { - addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); + addBlock(VariableDIE, dwarf::DW_AT_location, Block); } - // Add linkage name. + // Add the linkage name. StringRef LinkageName = GV.getLinkageName(); - if (!LinkageName.empty()) { + if (!LinkageName.empty()) // From DWARF4: DIEs to which DW_AT_linkage_name may apply include: // TAG_common_block, TAG_constant, TAG_entry_point, TAG_subprogram and // TAG_variable. - addString(IsStaticMember && VariableSpecDIE ? - VariableSpecDIE : VariableDIE, dwarf::DW_AT_MIPS_linkage_name, + addString(IsStaticMember && VariableSpecDIE ? VariableSpecDIE + : VariableDIE, + dwarf::DW_AT_MIPS_linkage_name, GlobalValue::getRealLinkageName(LinkageName)); - // In compatibility mode with older gdbs we put the linkage name on both - // the TAG_variable DIE and on the TAG_member DIE. - if (IsStaticMember && VariableSpecDIE && DD->useDarwinGDBCompat()) - addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, - GlobalValue::getRealLinkageName(LinkageName)); - } } else if (const ConstantInt *CI = - dyn_cast_or_null<ConstantInt>(GV.getConstant())) { + dyn_cast_or_null<ConstantInt>(GV.getConstant())) { // AT_const_value was added when the static member was created. To avoid // emitting AT_const_value multiple times, we only add AT_const_value when // it is not a static member. if (!IsStaticMember) - addConstantValue(VariableDIE, CI, GTy.isUnsignedDIType()); - } else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) { + addConstantValue(VariableDIE, CI, isUnsignedDIType(DD, GTy)); + } else if (const ConstantExpr *CE = getMergedGlobalExpr(GV->getOperand(11))) { addToAccelTable = true; // GV is a merged global. DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); Value *Ptr = CE->getOperand(0); - addOpAddress(Block, Asm->Mang->getSymbol(cast<GlobalValue>(Ptr))); - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); - SmallVector<Value*, 3> Idx(CE->op_begin()+1, CE->op_end()); - addUInt(Block, 0, dwarf::DW_FORM_udata, - Asm->getDataLayout().getIndexedOffset(Ptr->getType(), Idx)); - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); - addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); + addOpAddress(Block, Asm->getSymbol(cast<GlobalValue>(Ptr))); + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + SmallVector<Value *, 3> Idx(CE->op_begin() + 1, CE->op_end()); + addUInt(Block, dwarf::DW_FORM_udata, + Asm->getDataLayout().getIndexedOffset(Ptr->getType(), Idx)); + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); + addBlock(VariableDIE, dwarf::DW_AT_location, Block); } if (addToAccelTable) { @@ -1466,13 +1637,17 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { if (GV.getLinkageName() != "" && GV.getName() != GV.getLinkageName()) addAccelName(GV.getLinkageName(), AddrDIE); } + + if (!GV.isLocalToUnit()) + addGlobalName(GV.getName(), VariableSpecDIE ? VariableSpecDIE : VariableDIE, + GV.getContext()); } /// constructSubrangeDIE - Construct subrange DIE from DISubrange. void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy) { - DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type); - addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy); + DIE *DW_Subrange = createAndAddDIE(dwarf::DW_TAG_subrange_type, Buffer); + addDIEEntry(DW_Subrange, dwarf::DW_AT_type, IndexTy); // The LowerBound value defines the lower bounds which is typically zero for // C/C++. The Count value is the number of elements. Values are 64 bit. If @@ -1485,26 +1660,22 @@ void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, int64_t Count = SR.getCount(); if (DefaultLowerBound == -1 || LowerBound != DefaultLowerBound) - addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, LowerBound); + addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, None, LowerBound); if (Count != -1 && Count != 0) // FIXME: An unbounded array should reference the expression that defines // the array. - addUInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, LowerBound + Count - 1); - - Buffer.addChild(DW_Subrange); + addUInt(DW_Subrange, dwarf::DW_AT_upper_bound, None, + LowerBound + Count - 1); } /// constructArrayTypeDIE - Construct array type DIE from DICompositeType. -void CompileUnit::constructArrayTypeDIE(DIE &Buffer, - DICompositeType *CTy) { - Buffer.setTag(dwarf::DW_TAG_array_type); - if (CTy->isVector()) +void CompileUnit::constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy) { + if (CTy.isVector()) addFlag(&Buffer, dwarf::DW_AT_GNU_vector); - // Emit derived type. - addType(&Buffer, CTy->getTypeDerivedFrom()); - DIArray Elements = CTy->getTypeArray(); + // Emit the element type. + addType(&Buffer, resolve(CTy.getTypeDerivedFrom())); // Get an anonymous type for index type. // FIXME: This type should be passed down from the front end @@ -1512,16 +1683,16 @@ void CompileUnit::constructArrayTypeDIE(DIE &Buffer, DIE *IdxTy = getIndexTyDie(); if (!IdxTy) { // Construct an anonymous type for index type. - IdxTy = new DIE(dwarf::DW_TAG_base_type); + IdxTy = createAndAddDIE(dwarf::DW_TAG_base_type, *CUDie.get()); addString(IdxTy, dwarf::DW_AT_name, "int"); - addUInt(IdxTy, dwarf::DW_AT_byte_size, 0, sizeof(int32_t)); + addUInt(IdxTy, dwarf::DW_AT_byte_size, None, sizeof(int32_t)); addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, dwarf::DW_ATE_signed); - addDie(IdxTy); setIndexTyDie(IdxTy); } // Add subranges to array type. + DIArray Elements = CTy.getTypeArray(); for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { DIDescriptor Element = Elements.getElement(i); if (Element.getTag() == dwarf::DW_TAG_subrange_type) @@ -1529,168 +1700,180 @@ void CompileUnit::constructArrayTypeDIE(DIE &Buffer, } } -/// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator. -DIE *CompileUnit::constructEnumTypeDIE(DIEnumerator ETy) { - DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator); - StringRef Name = ETy.getName(); - addString(Enumerator, dwarf::DW_AT_name, Name); - int64_t Value = ETy.getEnumValue(); - addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value); - return Enumerator; +/// constructEnumTypeDIE - Construct an enum type DIE from DICompositeType. +void CompileUnit::constructEnumTypeDIE(DIE &Buffer, DICompositeType CTy) { + DIArray Elements = CTy.getTypeArray(); + + // Add enumerators to enumeration type. + for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { + DIEnumerator Enum(Elements.getElement(i)); + if (Enum.isEnumerator()) { + DIE *Enumerator = createAndAddDIE(dwarf::DW_TAG_enumerator, Buffer); + StringRef Name = Enum.getName(); + addString(Enumerator, dwarf::DW_AT_name, Name); + int64_t Value = Enum.getEnumValue(); + addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value); + } + } + DIType DTy = resolve(CTy.getTypeDerivedFrom()); + if (DTy) { + addType(&Buffer, DTy); + addFlag(&Buffer, dwarf::DW_AT_enum_class); + } } /// constructContainingTypeDIEs - Construct DIEs for types that contain /// vtables. void CompileUnit::constructContainingTypeDIEs() { for (DenseMap<DIE *, const MDNode *>::iterator CI = ContainingTypeMap.begin(), - CE = ContainingTypeMap.end(); CI != CE; ++CI) { + CE = ContainingTypeMap.end(); + CI != CE; ++CI) { DIE *SPDie = CI->first; - const MDNode *N = CI->second; - if (!N) continue; - DIE *NDie = getDIE(N); - if (!NDie) continue; - addDIEEntry(SPDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie); + DIDescriptor D(CI->second); + if (!D) + continue; + DIE *NDie = getDIE(D); + if (!NDie) + continue; + addDIEEntry(SPDie, dwarf::DW_AT_containing_type, NDie); } } /// constructVariableDIE - Construct a DIE for the given DbgVariable. -DIE *CompileUnit::constructVariableDIE(DbgVariable *DV, - bool isScopeAbstract) { - StringRef Name = DV->getName(); - - // Translate tag to proper Dwarf tag. - unsigned Tag = DV->getTag(); +DIE *CompileUnit::constructVariableDIE(DbgVariable &DV, bool isScopeAbstract) { + StringRef Name = DV.getName(); // Define variable debug information entry. - DIE *VariableDie = new DIE(Tag); - DbgVariable *AbsVar = DV->getAbstractVariable(); + DIE *VariableDie = new DIE(DV.getTag()); + DbgVariable *AbsVar = DV.getAbstractVariable(); DIE *AbsDIE = AbsVar ? AbsVar->getDIE() : NULL; if (AbsDIE) - addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin, - dwarf::DW_FORM_ref4, AbsDIE); + addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin, AbsDIE); else { - addString(VariableDie, dwarf::DW_AT_name, Name); - addSourceLine(VariableDie, DV->getVariable()); - addType(VariableDie, DV->getType()); + if (!Name.empty()) + addString(VariableDie, dwarf::DW_AT_name, Name); + addSourceLine(VariableDie, DV.getVariable()); + addType(VariableDie, DV.getType()); } - if (DV->isArtificial()) + if (DV.isArtificial()) addFlag(VariableDie, dwarf::DW_AT_artificial); if (isScopeAbstract) { - DV->setDIE(VariableDie); + DV.setDIE(VariableDie); return VariableDie; } // Add variable address. - unsigned Offset = DV->getDotDebugLocOffset(); + unsigned Offset = DV.getDotDebugLocOffset(); if (Offset != ~0U) { - addLabel(VariableDie, dwarf::DW_AT_location, dwarf::DW_FORM_data4, + addLabel(VariableDie, dwarf::DW_AT_location, + DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset + : dwarf::DW_FORM_data4, Asm->GetTempSymbol("debug_loc", Offset)); - DV->setDIE(VariableDie); + DV.setDIE(VariableDie); return VariableDie; } // Check if variable is described by a DBG_VALUE instruction. - if (const MachineInstr *DVInsn = DV->getMInsn()) { + if (const MachineInstr *DVInsn = DV.getMInsn()) { assert(DVInsn->getNumOperands() == 3); if (DVInsn->getOperand(0).isReg()) { const MachineOperand RegOp = DVInsn->getOperand(0); // If the second operand is an immediate, this is an indirect value. if (DVInsn->getOperand(1).isImm()) { - MachineLocation Location(RegOp.getReg(), DVInsn->getOperand(1).getImm()); - addVariableAddress(*DV, VariableDie, Location); + MachineLocation Location(RegOp.getReg(), + DVInsn->getOperand(1).getImm()); + addVariableAddress(DV, VariableDie, Location); } else if (RegOp.getReg()) - addVariableAddress(*DV, VariableDie, MachineLocation(RegOp.getReg())); + addVariableAddress(DV, VariableDie, MachineLocation(RegOp.getReg())); } else if (DVInsn->getOperand(0).isImm()) - addConstantValue(VariableDie, DVInsn->getOperand(0), DV->getType()); + addConstantValue(VariableDie, DVInsn->getOperand(0), DV.getType()); else if (DVInsn->getOperand(0).isFPImm()) addConstantFPValue(VariableDie, DVInsn->getOperand(0)); else if (DVInsn->getOperand(0).isCImm()) addConstantValue(VariableDie, DVInsn->getOperand(0).getCImm(), - DV->getType().isUnsignedDIType()); + isUnsignedDIType(DD, DV.getType())); - DV->setDIE(VariableDie); + DV.setDIE(VariableDie); return VariableDie; } else { // .. else use frame index. - int FI = DV->getFrameIndex(); + int FI = DV.getFrameIndex(); if (FI != ~0) { unsigned FrameReg = 0; const TargetFrameLowering *TFI = Asm->TM.getFrameLowering(); - int Offset = - TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg); + int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg); MachineLocation Location(FrameReg, Offset); - addVariableAddress(*DV, VariableDie, Location); + addVariableAddress(DV, VariableDie, Location); } } - DV->setDIE(VariableDie); + DV.setDIE(VariableDie); return VariableDie; } -/// createMemberDIE - Create new member DIE. -DIE *CompileUnit::createMemberDIE(DIDerivedType DT) { - DIE *MemberDie = new DIE(DT.getTag()); +/// constructMemberDIE - Construct member DIE from DIDerivedType. +void CompileUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) { + DIE *MemberDie = createAndAddDIE(DT.getTag(), Buffer); StringRef Name = DT.getName(); if (!Name.empty()) addString(MemberDie, dwarf::DW_AT_name, Name); - addType(MemberDie, DT.getTypeDerivedFrom()); + addType(MemberDie, resolve(DT.getTypeDerivedFrom())); addSourceLine(MemberDie, DT); DIEBlock *MemLocationDie = new (DIEValueAllocator) DIEBlock(); - addUInt(MemLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - - uint64_t Size = DT.getSizeInBits(); - uint64_t FieldSize = DT.getOriginalTypeSize(); - - if (Size != FieldSize) { - // Handle bitfield. - addUInt(MemberDie, dwarf::DW_AT_byte_size, 0, DT.getOriginalTypeSize()>>3); - addUInt(MemberDie, dwarf::DW_AT_bit_size, 0, DT.getSizeInBits()); - - uint64_t Offset = DT.getOffsetInBits(); - uint64_t AlignMask = ~(DT.getAlignInBits() - 1); - uint64_t HiMark = (Offset + FieldSize) & AlignMask; - uint64_t FieldOffset = (HiMark - FieldSize); - Offset -= FieldOffset; - - // Maybe we need to work from the other end. - if (Asm->getDataLayout().isLittleEndian()) - Offset = FieldSize - (Offset + Size); - addUInt(MemberDie, dwarf::DW_AT_bit_offset, 0, Offset); - - // Here WD_AT_data_member_location points to the anonymous - // field that includes this bit field. - addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, FieldOffset >> 3); - - } else - // This is not a bitfield. - addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits() >> 3); + addUInt(MemLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - if (DT.getTag() == dwarf::DW_TAG_inheritance - && DT.isVirtual()) { + if (DT.getTag() == dwarf::DW_TAG_inheritance && DT.isVirtual()) { // For C++, virtual base classes are not at fixed offset. Use following // expression to extract appropriate offset from vtable. // BaseAddr = ObAddr + *((*ObAddr) - Offset) DIEBlock *VBaseLocationDie = new (DIEValueAllocator) DIEBlock(); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_dup); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits()); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_minus); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); - - addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, - VBaseLocationDie); - } else - addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie); + addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_dup); + addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + addUInt(VBaseLocationDie, dwarf::DW_FORM_udata, DT.getOffsetInBits()); + addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_minus); + addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); + + addBlock(MemberDie, dwarf::DW_AT_data_member_location, VBaseLocationDie); + } else { + uint64_t Size = DT.getSizeInBits(); + uint64_t FieldSize = getBaseTypeSize(DD, DT); + uint64_t OffsetInBytes; + + if (Size != FieldSize) { + // Handle bitfield. + addUInt(MemberDie, dwarf::DW_AT_byte_size, None, + getBaseTypeSize(DD, DT) >> 3); + addUInt(MemberDie, dwarf::DW_AT_bit_size, None, DT.getSizeInBits()); + + uint64_t Offset = DT.getOffsetInBits(); + uint64_t AlignMask = ~(DT.getAlignInBits() - 1); + uint64_t HiMark = (Offset + FieldSize) & AlignMask; + uint64_t FieldOffset = (HiMark - FieldSize); + Offset -= FieldOffset; + + // Maybe we need to work from the other end. + if (Asm->getDataLayout().isLittleEndian()) + Offset = FieldSize - (Offset + Size); + addUInt(MemberDie, dwarf::DW_AT_bit_offset, None, Offset); + + // Here WD_AT_data_member_location points to the anonymous + // field that includes this bit field. + OffsetInBytes = FieldOffset >> 3; + } else + // This is not a bitfield. + OffsetInBytes = DT.getOffsetInBits() >> 3; + addUInt(MemberDie, dwarf::DW_AT_data_member_location, None, OffsetInBytes); + } if (DT.isProtected()) addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, @@ -1714,17 +1897,26 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) { if (DT.isArtificial()) addFlag(MemberDie, dwarf::DW_AT_artificial); - - return MemberDie; } -/// createStaticMemberDIE - Create new DIE for C++ static member. -DIE *CompileUnit::createStaticMemberDIE(const DIDerivedType DT) { +/// getOrCreateStaticMemberDIE - Create new DIE for C++ static member. +DIE *CompileUnit::getOrCreateStaticMemberDIE(DIDerivedType DT) { if (!DT.Verify()) return NULL; - DIE *StaticMemberDIE = new DIE(DT.getTag()); - DIType Ty = DT.getTypeDerivedFrom(); + // Construct the context before querying for the existence of the DIE in case + // such construction creates the DIE. + DIE *ContextDIE = getOrCreateContextDIE(resolve(DT.getContext())); + assert(dwarf::isType(ContextDIE->getTag()) && + "Static member should belong to a type."); + + DIE *StaticMemberDIE = getDIE(DT); + if (StaticMemberDIE) + return StaticMemberDIE; + + StaticMemberDIE = createAndAddDIE(DT.getTag(), *ContextDIE, DT); + + DIType Ty = resolve(DT.getTypeDerivedFrom()); addString(StaticMemberDIE, dwarf::DW_AT_name, DT.getName()); addType(StaticMemberDIE, Ty); @@ -1745,10 +1937,20 @@ DIE *CompileUnit::createStaticMemberDIE(const DIDerivedType DT) { dwarf::DW_ACCESS_public); if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(DT.getConstant())) - addConstantValue(StaticMemberDIE, CI, Ty.isUnsignedDIType()); + addConstantValue(StaticMemberDIE, CI, isUnsignedDIType(DD, Ty)); if (const ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(DT.getConstant())) addConstantFPValue(StaticMemberDIE, CFP); - insertDIE(DT, StaticMemberDIE); return StaticMemberDIE; } + +void CompileUnit::emitHeader(const MCSection *ASection, + const MCSymbol *ASectionSym) { + Asm->OutStreamer.AddComment("DWARF version number"); + Asm->EmitInt16(DD->getDwarfVersion()); + Asm->OutStreamer.AddComment("Offset Into Abbrev. Section"); + Asm->EmitSectionOffset(Asm->GetTempSymbol(ASection->getLabelBeginName()), + ASectionSym); + Asm->OutStreamer.AddComment("Address Size (in bytes)"); + Asm->EmitInt8(Asm->getDataLayout().getPointerSize()); +} diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 3908b37..d782c88 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -15,7 +15,9 @@ #define CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H #include "DIE.h" +#include "DwarfDebug.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/StringMap.h" #include "llvm/DebugInfo.h" @@ -23,8 +25,6 @@ namespace llvm { -class DwarfDebug; -class DwarfUnits; class MachineLocation; class MachineOperand; class ConstantInt; @@ -39,11 +39,10 @@ class CompileUnit { /// unsigned UniqueID; - /// Language - The DW_AT_language of the compile unit - /// - unsigned Language; + /// Node - MDNode for the compile unit. + DICompileUnit Node; - /// Die - Compile unit debug information entry. + /// CUDie - Compile unit debug information entry. /// const OwningPtr<DIE> CUDie; @@ -67,18 +66,18 @@ class CompileUnit { /// GlobalNames - A map of globally visible named entities for this unit. /// - StringMap<DIE*> GlobalNames; + StringMap<DIE *> GlobalNames; /// GlobalTypes - A map of globally visible types for this unit. /// - StringMap<DIE*> GlobalTypes; + StringMap<DIE *> GlobalTypes; /// AccelNames - A map of names for the name accelerator table. /// - StringMap<std::vector<DIE*> > AccelNames; - StringMap<std::vector<DIE*> > AccelObjC; - StringMap<std::vector<DIE*> > AccelNamespace; - StringMap<std::vector<std::pair<DIE*, unsigned> > > AccelTypes; + StringMap<std::vector<DIE *> > AccelNames; + StringMap<std::vector<DIE *> > AccelObjC; + StringMap<std::vector<DIE *> > AccelNamespace; + StringMap<std::vector<std::pair<DIE *, unsigned> > > AccelTypes; /// DIEBlocks - A list of all the DIEBlocks in use. std::vector<DIEBlock *> DIEBlocks; @@ -88,165 +87,154 @@ class CompileUnit { /// corresponds to the MDNode mapped with the subprogram DIE. DenseMap<DIE *, const MDNode *> ContainingTypeMap; - /// Offset of the CUDie from beginning of debug info section. - unsigned DebugInfoOffset; + // DIEValueAllocator - All DIEValues are allocated through this allocator. + BumpPtrAllocator DIEValueAllocator; - /// getLowerBoundDefault - Return the default lower bound for an array. If the - /// DWARF version doesn't handle the language, return -1. - int64_t getDefaultLowerBound() const; + // DIEIntegerOne - A preallocated DIEValue because 1 is used frequently. + DIEInteger *DIEIntegerOne; public: - CompileUnit(unsigned UID, unsigned L, DIE *D, const MDNode *N, AsmPrinter *A, + CompileUnit(unsigned UID, DIE *D, DICompileUnit CU, AsmPrinter *A, DwarfDebug *DW, DwarfUnits *DWU); ~CompileUnit(); // Accessors. - unsigned getUniqueID() const { return UniqueID; } - unsigned getLanguage() const { return Language; } - DIE* getCUDie() const { return CUDie.get(); } - unsigned getDebugInfoOffset() const { return DebugInfoOffset; } - const StringMap<DIE*> &getGlobalNames() const { return GlobalNames; } - const StringMap<DIE*> &getGlobalTypes() const { return GlobalTypes; } - - const StringMap<std::vector<DIE*> > &getAccelNames() const { + unsigned getUniqueID() const { return UniqueID; } + uint16_t getLanguage() const { return Node.getLanguage(); } + DICompileUnit getNode() const { return Node; } + DIE *getCUDie() const { return CUDie.get(); } + const StringMap<DIE *> &getGlobalNames() const { return GlobalNames; } + const StringMap<DIE *> &getGlobalTypes() const { return GlobalTypes; } + + const StringMap<std::vector<DIE *> > &getAccelNames() const { return AccelNames; } - const StringMap<std::vector<DIE*> > &getAccelObjC() const { + const StringMap<std::vector<DIE *> > &getAccelObjC() const { return AccelObjC; } - const StringMap<std::vector<DIE*> > &getAccelNamespace() const { + const StringMap<std::vector<DIE *> > &getAccelNamespace() const { return AccelNamespace; } - const StringMap<std::vector<std::pair<DIE*, unsigned > > > - &getAccelTypes() const { + const StringMap<std::vector<std::pair<DIE *, unsigned> > > & + getAccelTypes() const { return AccelTypes; } + unsigned getDebugInfoOffset() const { return DebugInfoOffset; } void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; } + /// hasContent - Return true if this compile unit has something to write out. /// bool hasContent() const { return !CUDie->getChildren().empty(); } + /// getParentContextString - Get a string containing the language specific + /// context for a global name. + std::string getParentContextString(DIScope Context) const; + /// addGlobalName - Add a new global entity to the compile unit. /// - void addGlobalName(StringRef Name, DIE *Die) { GlobalNames[Name] = Die; } + void addGlobalName(StringRef Name, DIE *Die, DIScope Context); /// addGlobalType - Add a new global type to the compile unit. /// void addGlobalType(DIType Ty); + /// addPubTypes - Add a set of types from the subprogram to the global types. + void addPubTypes(DISubprogram SP); /// addAccelName - Add a new name to the name accelerator table. - void addAccelName(StringRef Name, DIE *Die) { - std::vector<DIE*> &DIEs = AccelNames[Name]; - DIEs.push_back(Die); - } - void addAccelObjC(StringRef Name, DIE *Die) { - std::vector<DIE*> &DIEs = AccelObjC[Name]; - DIEs.push_back(Die); - } - void addAccelNamespace(StringRef Name, DIE *Die) { - std::vector<DIE*> &DIEs = AccelNamespace[Name]; - DIEs.push_back(Die); - } - void addAccelType(StringRef Name, std::pair<DIE *, unsigned> Die) { - std::vector<std::pair<DIE *, unsigned> > &DIEs = AccelTypes[Name]; - DIEs.push_back(Die); - } + void addAccelName(StringRef Name, DIE *Die); - /// getDIE - Returns the debug information entry map slot for the - /// specified debug variable. - DIE *getDIE(const MDNode *N) const { return MDNodeToDieMap.lookup(N); } + /// addAccelObjC - Add a new name to the ObjC accelerator table. + void addAccelObjC(StringRef Name, DIE *Die); - DIEBlock *getDIEBlock() { - return new (DIEValueAllocator) DIEBlock(); - } + /// addAccelNamespace - Add a new name to the namespace accelerator table. + void addAccelNamespace(StringRef Name, DIE *Die); - /// insertDIE - Insert DIE into the map. - void insertDIE(const MDNode *N, DIE *D) { - MDNodeToDieMap.insert(std::make_pair(N, D)); - } + /// addAccelType - Add a new type to the type accelerator table. + void addAccelType(StringRef Name, std::pair<DIE *, unsigned> Die); - /// getDIEEntry - Returns the debug information entry for the specified - /// debug variable. - DIEEntry *getDIEEntry(const MDNode *N) const { - return MDNodeToDIEEntryMap.lookup(N); - } + /// getDIE - Returns the debug information entry map slot for the + /// specified debug variable. We delegate the request to DwarfDebug + /// when the MDNode can be part of the type system, since DIEs for + /// the type system can be shared across CUs and the mappings are + /// kept in DwarfDebug. + DIE *getDIE(DIDescriptor D) const; - /// insertDIEEntry - Insert debug information entry into the map. - void insertDIEEntry(const MDNode *N, DIEEntry *E) { - MDNodeToDIEEntryMap.insert(std::make_pair(N, E)); - } + DIEBlock *getDIEBlock() { return new (DIEValueAllocator) DIEBlock(); } + + /// insertDIE - Insert DIE into the map. We delegate the request to DwarfDebug + /// when the MDNode can be part of the type system, since DIEs for + /// the type system can be shared across CUs and the mappings are + /// kept in DwarfDebug. + void insertDIE(DIDescriptor Desc, DIE *D); /// addDie - Adds or interns the DIE to the compile unit. /// - void addDie(DIE *Buffer) { - this->CUDie->addChild(Buffer); - } - - // getIndexTyDie - Get an anonymous type for index type. - DIE *getIndexTyDie() { - return IndexTyDie; - } - - // setIndexTyDie - Set D as anonymous type for index which can be reused - // later. - void setIndexTyDie(DIE *D) { - IndexTyDie = D; - } + void addDie(DIE *Buffer) { CUDie->addChild(Buffer); } /// addFlag - Add a flag that is true to the DIE. - void addFlag(DIE *Die, unsigned Attribute); + void addFlag(DIE *Die, dwarf::Attribute Attribute); /// addUInt - Add an unsigned integer attribute data and value. /// - void addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer); + void addUInt(DIE *Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form, + uint64_t Integer); + + void addUInt(DIEBlock *Block, dwarf::Form Form, uint64_t Integer); /// addSInt - Add an signed integer attribute data and value. /// - void addSInt(DIE *Die, unsigned Attribute, unsigned Form, int64_t Integer); + void addSInt(DIE *Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form, + int64_t Integer); + + void addSInt(DIEBlock *Die, Optional<dwarf::Form> Form, int64_t Integer); /// addString - Add a string attribute data and value. /// - void addString(DIE *Die, unsigned Attribute, const StringRef Str); + void addString(DIE *Die, dwarf::Attribute Attribute, const StringRef Str); /// addLocalString - Add a string attribute data and value. /// - void addLocalString(DIE *Die, unsigned Attribute, const StringRef Str); + void addLocalString(DIE *Die, dwarf::Attribute Attribute, const StringRef Str); /// addExpr - Add a Dwarf expression attribute data and value. /// - void addExpr(DIE *Die, unsigned Attribute, unsigned Form, - const MCExpr *Expr); + void addExpr(DIEBlock *Die, dwarf::Form Form, const MCExpr *Expr); /// addLabel - Add a Dwarf label attribute data and value. /// - void addLabel(DIE *Die, unsigned Attribute, unsigned Form, + void addLabel(DIE *Die, dwarf::Attribute Attribute, dwarf::Form Form, const MCSymbol *Label); + void addLabel(DIEBlock *Die, dwarf::Form Form, const MCSymbol *Label); + /// addLabelAddress - Add a dwarf label attribute data and value using /// either DW_FORM_addr or DW_FORM_GNU_addr_index. /// - void addLabelAddress(DIE *Die, unsigned Attribute, MCSymbol *Label); + void addLabelAddress(DIE *Die, dwarf::Attribute Attribute, MCSymbol *Label); /// addOpAddress - Add a dwarf op address data and value using the /// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index. /// - void addOpAddress(DIE *Die, const MCSymbol *Label); - void addOpAddress(DIE *Die, const MCSymbolRefExpr *Label); + void addOpAddress(DIEBlock *Die, const MCSymbol *Label); /// addDelta - Add a label delta attribute data and value. /// - void addDelta(DIE *Die, unsigned Attribute, unsigned Form, - const MCSymbol *Hi, const MCSymbol *Lo); + void addDelta(DIE *Die, dwarf::Attribute Attribute, dwarf::Form Form, const MCSymbol *Hi, + const MCSymbol *Lo); /// addDIEEntry - Add a DIE attribute data and value. /// - void addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry); + void addDIEEntry(DIE *Die, dwarf::Attribute Attribute, DIE *Entry); + + /// addDIEEntry - Add a DIE attribute data and value. + /// + void addDIEEntry(DIE *Die, dwarf::Attribute Attribute, DIEEntry *Entry); /// addBlock - Add block data. /// - void addBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block); + void addBlock(DIE *Die, dwarf::Attribute Attribute, DIEBlock *Block); /// addSourceLine - Add location information to specified debug information /// entry. @@ -259,8 +247,8 @@ public: /// addAddress - Add an address attribute to a die based on the location /// provided. - void addAddress(DIE *Die, unsigned Attribute, - const MachineLocation &Location, bool Indirect = false); + void addAddress(DIE *Die, dwarf::Attribute Attribute, const MachineLocation &Location, + bool Indirect = false); /// addConstantValue - Add constant value entry in variable DIE. void addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty); @@ -275,17 +263,17 @@ public: void addTemplateParams(DIE &Buffer, DIArray TParams); /// addRegisterOp - Add register operand. - void addRegisterOp(DIE *TheDie, unsigned Reg); + void addRegisterOp(DIEBlock *TheDie, unsigned Reg); /// addRegisterOffset - Add register offset. - void addRegisterOffset(DIE *TheDie, unsigned Reg, int64_t Offset); + void addRegisterOffset(DIEBlock *TheDie, unsigned Reg, int64_t Offset); /// addComplexAddress - Start with the address based on the location provided, /// and generate the DWARF information necessary to find the actual variable /// (navigating the extra location information encoded in the type) based on /// the starting location. Add the DWARF information to the die. /// - void addComplexAddress(const DbgVariable &DV, DIE *Die, unsigned Attribute, + void addComplexAddress(const DbgVariable &DV, DIE *Die, dwarf::Attribute Attribute, const MachineLocation &Location); // FIXME: Should be reformulated in terms of addComplexAddress. @@ -295,7 +283,7 @@ public: /// starting location. Add the DWARF information to the die. Obsolete, /// please use addComplexAddress instead. /// - void addBlockByrefAddress(const DbgVariable &DV, DIE *Die, unsigned Attribute, + void addBlockByrefAddress(const DbgVariable &DV, DIE *Die, dwarf::Attribute Attribute, const MachineLocation &Location); /// addVariableAddress - Add DW_AT_location attribute for a @@ -303,13 +291,10 @@ public: void addVariableAddress(const DbgVariable &DV, DIE *Die, MachineLocation Location); - /// addToContextOwner - Add Die into the list of its context owner's children. - void addToContextOwner(DIE *Die, DIDescriptor Context); - /// addType - Add a new type attribute to the specified entity. This takes /// and attribute parameter because DW_AT_friend attributes are also /// type references. - void addType(DIE *Entity, DIType Ty, unsigned Attribute = dwarf::DW_AT_type); + void addType(DIE *Entity, DIType Ty, dwarf::Attribute Attribute = dwarf::DW_AT_type); /// getOrCreateNameSpace - Create a DIE for DINameSpace. DIE *getOrCreateNameSpace(DINameSpace NS); @@ -321,66 +306,103 @@ public: /// given DIType. DIE *getOrCreateTypeDIE(const MDNode *N); - /// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE - /// for the given DITemplateTypeParameter. - DIE *getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP); + /// getOrCreateContextDIE - Get context owner's DIE. + DIE *getOrCreateContextDIE(DIScope Context); - /// getOrCreateTemplateValueParameterDIE - Find existing DIE or create - /// new DIE for the given DITemplateValueParameter. - DIE *getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TVP); + /// createGlobalVariableDIE - create global variable DIE. + void createGlobalVariableDIE(DIGlobalVariable GV); - /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug - /// information entry. - DIEEntry *createDIEEntry(DIE *Entry); + /// constructContainingTypeDIEs - Construct DIEs for types that contain + /// vtables. + void constructContainingTypeDIEs(); - /// createGlobalVariableDIE - create global variable DIE. - void createGlobalVariableDIE(const MDNode *N); + /// constructVariableDIE - Construct a DIE for the given DbgVariable. + DIE *constructVariableDIE(DbgVariable &DV, bool isScopeAbstract); + + /// Create a DIE with the given Tag, add the DIE to its parent, and + /// call insertDIE if MD is not null. + DIE *createAndAddDIE(unsigned Tag, DIE &Parent, DIDescriptor N = DIDescriptor()); + + /// Compute the size of a header for this unit, not including the initial + /// length field. + unsigned getHeaderSize() const { + return sizeof(int16_t) + // DWARF version number + sizeof(int32_t) + // Offset Into Abbrev. Section + sizeof(int8_t); // Pointer Size (in bytes) + } - void addPubTypes(DISubprogram SP); + /// Emit the header for this unit, not including the initial length field. + void emitHeader(const MCSection *ASection, const MCSymbol *ASectionSym); +private: /// constructTypeDIE - Construct basic type die from DIBasicType. - void constructTypeDIE(DIE &Buffer, - DIBasicType BTy); + void constructTypeDIE(DIE &Buffer, DIBasicType BTy); /// constructTypeDIE - Construct derived type die from DIDerivedType. - void constructTypeDIE(DIE &Buffer, - DIDerivedType DTy); + void constructTypeDIE(DIE &Buffer, DIDerivedType DTy); /// constructTypeDIE - Construct type DIE from DICompositeType. - void constructTypeDIE(DIE &Buffer, - DICompositeType CTy); + void constructTypeDIE(DIE &Buffer, DICompositeType CTy); /// constructSubrangeDIE - Construct subrange DIE from DISubrange. void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy); /// constructArrayTypeDIE - Construct array type DIE from DICompositeType. - void constructArrayTypeDIE(DIE &Buffer, - DICompositeType *CTy); + void constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy); /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator. - DIE *constructEnumTypeDIE(DIEnumerator ETy); + void constructEnumTypeDIE(DIE &Buffer, DICompositeType CTy); - /// constructContainingTypeDIEs - Construct DIEs for types that contain - /// vtables. - void constructContainingTypeDIEs(); + /// constructMemberDIE - Construct member DIE from DIDerivedType. + void constructMemberDIE(DIE &Buffer, DIDerivedType DT); - /// constructVariableDIE - Construct a DIE for the given DbgVariable. - DIE *constructVariableDIE(DbgVariable *DV, bool isScopeAbstract); + /// constructTemplateTypeParameterDIE - Construct new DIE for the given + /// DITemplateTypeParameter. + void constructTemplateTypeParameterDIE(DIE &Buffer, + DITemplateTypeParameter TP); - /// createMemberDIE - Create new member DIE. - DIE *createMemberDIE(DIDerivedType DT); + /// constructTemplateValueParameterDIE - Construct new DIE for the given + /// DITemplateValueParameter. + void constructTemplateValueParameterDIE(DIE &Buffer, + DITemplateValueParameter TVP); - /// createStaticMemberDIE - Create new static data member DIE. - DIE *createStaticMemberDIE(DIDerivedType DT); + /// getOrCreateStaticMemberDIE - Create new static data member DIE. + DIE *getOrCreateStaticMemberDIE(DIDerivedType DT); - /// getOrCreateContextDIE - Get context owner's DIE. - DIE *getOrCreateContextDIE(DIDescriptor Context); + /// Offset of the CUDie from beginning of debug info section. + unsigned DebugInfoOffset; -private: + /// getLowerBoundDefault - Return the default lower bound for an array. If the + /// DWARF version doesn't handle the language, return -1. + int64_t getDefaultLowerBound() const; - // DIEValueAllocator - All DIEValues are allocated through this allocator. - BumpPtrAllocator DIEValueAllocator; - DIEInteger *DIEIntegerOne; + /// getDIEEntry - Returns the debug information entry for the specified + /// debug variable. + DIEEntry *getDIEEntry(const MDNode *N) const { + return MDNodeToDIEEntryMap.lookup(N); + } + + /// insertDIEEntry - Insert debug information entry into the map. + void insertDIEEntry(const MDNode *N, DIEEntry *E) { + MDNodeToDIEEntryMap.insert(std::make_pair(N, E)); + } + + // getIndexTyDie - Get an anonymous type for index type. + DIE *getIndexTyDie() { return IndexTyDie; } + + // setIndexTyDie - Set D as anonymous type for index which can be reused + // later. + void setIndexTyDie(DIE *D) { IndexTyDie = D; } + + /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug + /// information entry. + DIEEntry *createDIEEntry(DIE *Entry); + + /// resolve - Look in the DwarfDebug map for the MDNode that + /// corresponds to the reference. + template <typename T> T resolve(DIRef<T> Ref) const { + return DD->resolve(Ref); + } }; } // end llvm namespace diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 979c0c3..24e2c05 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -14,6 +14,7 @@ #define DEBUG_TYPE "dwarfdebug" #include "DwarfDebug.h" #include "DIE.h" +#include "DIEHash.h" #include "DwarfAccelTable.h" #include "DwarfCompileUnit.h" #include "llvm/ADT/STLExtras.h" @@ -34,6 +35,7 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/MD5.h" @@ -57,15 +59,20 @@ static cl::opt<bool> UnknownLocations( cl::init(false)); static cl::opt<bool> -GenerateDwarfPubNamesSection("generate-dwarf-pubnames", cl::Hidden, - cl::init(false), - cl::desc("Generate DWARF pubnames section")); - -static cl::opt<bool> GenerateODRHash("generate-odr-hash", cl::Hidden, cl::desc("Add an ODR hash to external type DIEs."), cl::init(false)); +static cl::opt<bool> +GenerateCUHash("generate-cu-hash", cl::Hidden, + cl::desc("Add the CU hash as the dwo_id."), + cl::init(false)); + +static cl::opt<bool> +GenerateGnuPubSections("generate-gnu-dwarf-pub-sections", cl::Hidden, + cl::desc("Generate GNU-style pubnames and pubtypes"), + cl::init(false)); + namespace { enum DefaultOnOff { Default, @@ -83,14 +90,6 @@ DwarfAccelTables("dwarf-accel-tables", cl::Hidden, cl::init(Default)); static cl::opt<DefaultOnOff> -DarwinGDBCompat("darwin-gdb-compat", cl::Hidden, - cl::desc("Compatibility with Darwin gdb."), - cl::values(clEnumVal(Default, "Default for platform"), - clEnumVal(Enable, "Enabled"), - clEnumVal(Disable, "Disabled"), clEnumValEnd), - cl::init(Default)); - -static cl::opt<DefaultOnOff> SplitDwarf("split-dwarf", cl::Hidden, cl::desc("Output prototype dwarf split debug info."), cl::values(clEnumVal(Default, "Default for platform"), @@ -98,16 +97,16 @@ SplitDwarf("split-dwarf", cl::Hidden, clEnumVal(Disable, "Disabled"), clEnumValEnd), cl::init(Default)); -namespace { - const char *const DWARFGroupName = "DWARF Emission"; - const char *const DbgTimerName = "DWARF Debug Writer"; +static cl::opt<DefaultOnOff> +DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden, + cl::desc("Generate DWARF pubnames and pubtypes sections"), + cl::values(clEnumVal(Default, "Default for platform"), + clEnumVal(Enable, "Enabled"), + clEnumVal(Disable, "Disabled"), clEnumValEnd), + cl::init(Default)); - struct CompareFirst { - template <typename T> bool operator()(const T &lhs, const T &rhs) const { - return lhs.first < rhs.first; - } - }; -} // end anonymous namespace +static const char *const DWARFGroupName = "DWARF Emission"; +static const char *const DbgTimerName = "DWARF Debug Writer"; //===----------------------------------------------------------------------===// @@ -117,6 +116,13 @@ static const unsigned InitAbbreviationsSetSize = 9; // log2(512) namespace llvm { +/// resolve - Look in the DwarfDebug map for the MDNode that +/// corresponds to the reference. +template <typename T> +T DbgVariable::resolve(DIRef<T> Ref) const { + return DD->resolve(Ref); +} + DIType DbgVariable::getType() const { DIType Ty = Var.getType(); // FIXME: isBlockByrefVariable should be reformulated in terms of complex @@ -147,21 +153,16 @@ DIType DbgVariable::getType() const { the pointers and __Block_byref_x_VarName struct to find the actual value of the variable. The function addBlockByrefType does this. */ DIType subType = Ty; - unsigned tag = Ty.getTag(); + uint16_t tag = Ty.getTag(); - if (tag == dwarf::DW_TAG_pointer_type) { - DIDerivedType DTy = DIDerivedType(Ty); - subType = DTy.getTypeDerivedFrom(); - } - - DICompositeType blockStruct = DICompositeType(subType); - DIArray Elements = blockStruct.getTypeArray(); + if (tag == dwarf::DW_TAG_pointer_type) + subType = resolve(DIDerivedType(Ty).getTypeDerivedFrom()); + DIArray Elements = DICompositeType(subType).getTypeArray(); for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { - DIDescriptor Element = Elements.getElement(i); - DIDerivedType DT = DIDerivedType(Element); + DIDerivedType DT(Elements.getElement(i)); if (getName() == DT.getName()) - return (DT.getTypeDerivedFrom()); + return (resolve(DT.getTypeDerivedFrom())); } } return Ty; @@ -182,10 +183,10 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) AbbreviationsSet(InitAbbreviationsSetSize), SourceIdMap(DIEValueAllocator), PrevLabel(NULL), GlobalCUIndexCount(0), - InfoHolder(A, &AbbreviationsSet, &Abbreviations, "info_string", + InfoHolder(A, &AbbreviationsSet, Abbreviations, "info_string", DIEValueAllocator), SkeletonAbbrevSet(InitAbbreviationsSetSize), - SkeletonHolder(A, &SkeletonAbbrevSet, &SkeletonAbbrevs, "skel_string", + SkeletonHolder(A, &SkeletonAbbrevSet, SkeletonAbbrevs, "skel_string", DIEValueAllocator) { DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0; @@ -195,29 +196,24 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) DwarfAbbrevDWOSectionSym = DwarfStrDWOSectionSym = 0; FunctionBeginSym = FunctionEndSym = 0; - // Turn on accelerator tables and older gdb compatibility - // for Darwin. + // Turn on accelerator tables for Darwin by default, pubnames by + // default for non-Darwin, and handle split dwarf. bool IsDarwin = Triple(A->getTargetTriple()).isOSDarwin(); - if (DarwinGDBCompat == Default) { - if (IsDarwin) - IsDarwinGDBCompat = true; - else - IsDarwinGDBCompat = false; - } else - IsDarwinGDBCompat = DarwinGDBCompat == Enable ? true : false; - if (DwarfAccelTables == Default) { - if (IsDarwin) - HasDwarfAccelTables = true; - else - HasDwarfAccelTables = false; - } else - HasDwarfAccelTables = DwarfAccelTables == Enable ? true : false; + if (DwarfAccelTables == Default) + HasDwarfAccelTables = IsDarwin; + else + HasDwarfAccelTables = DwarfAccelTables == Enable; if (SplitDwarf == Default) HasSplitDwarf = false; else - HasSplitDwarf = SplitDwarf == Enable ? true : false; + HasSplitDwarf = SplitDwarf == Enable; + + if (DwarfPubSections == Default) + HasDwarfPubSections = !IsDarwin; + else + HasDwarfPubSections = DwarfPubSections == Enable; DwarfVersion = getDwarfVersionFromModule(MMI->getModule()); @@ -226,8 +222,6 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) beginModule(); } } -DwarfDebug::~DwarfDebug() { -} // Switch to the specified MCSection and emit an assembler // temporary label to it if SymbolStem is specified. @@ -285,10 +279,10 @@ void DwarfUnits::assignAbbrevNumber(DIEAbbrev &Abbrev) { // If it's newly added. if (InSet == &Abbrev) { // Add to abbreviation list. - Abbreviations->push_back(&Abbrev); + Abbreviations.push_back(&Abbrev); // Assign the vector position + 1 as its number. - Abbrev.setNumber(Abbreviations->size()); + Abbrev.setNumber(Abbreviations.size()); } else { // Assign existing abbreviation number. Abbrev.setNumber(InSet->getNumber()); @@ -302,12 +296,7 @@ static bool isObjCClass(StringRef Name) { static bool hasObjCCategory(StringRef Name) { if (!isObjCClass(Name)) return false; - size_t pos = Name.find(')'); - if (pos != std::string::npos) { - if (Name[pos+1] != ' ') return false; - return true; - } - return false; + return Name.find(") ") != StringRef::npos; } static void getObjCClassCategory(StringRef In, StringRef &Class, @@ -327,11 +316,20 @@ static StringRef getObjCMethodName(StringRef In) { return In.slice(In.find(' ') + 1, In.find(']')); } +// Helper for sorting sections into a stable output order. +static bool SectionSort(const MCSection *A, const MCSection *B) { + std::string LA = (A ? A->getLabelBeginName() : ""); + std::string LB = (B ? B->getLabelBeginName() : ""); + return LA < LB; +} + // Add the various names to the Dwarf accelerator table names. +// TODO: Determine whether or not we should add names for programs +// that do not have a DW_AT_name or DW_AT_linkage_name field - this +// is only slightly different than the lookup of non-standard ObjC names. static void addSubprogramNames(CompileUnit *TheCU, DISubprogram SP, DIE* Die) { if (!SP.isDefinition()) return; - TheCU->addAccelName(SP.getName(), Die); // If the linkage name is different than the name, go ahead and output @@ -352,30 +350,34 @@ static void addSubprogramNames(CompileUnit *TheCU, DISubprogram SP, } } +/// isSubprogramContext - Return true if Context is either a subprogram +/// or another context nested inside a subprogram. +bool DwarfDebug::isSubprogramContext(const MDNode *Context) { + if (!Context) + return false; + DIDescriptor D(Context); + if (D.isSubprogram()) + return true; + if (D.isType()) + return isSubprogramContext(resolve(DIType(Context).getContext())); + return false; +} + // Find DIE for the given subprogram and attach appropriate DW_AT_low_pc // and DW_AT_high_pc attributes. If there are global variables in this // scope then create and insert DIEs for these variables. -DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, - const MDNode *SPNode) { - DIE *SPDie = SPCU->getDIE(SPNode); +DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, DISubprogram SP) { + DIE *SPDie = SPCU->getDIE(SP); assert(SPDie && "Unable to find subprogram DIE!"); - DISubprogram SP(SPNode); // If we're updating an abstract DIE, then we will be adding the children and // object pointer later on. But what we don't want to do is process the // concrete DIE twice. - DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode); - if (AbsSPDIE) { - bool InSameCU = (AbsSPDIE->getCompileUnit() == SPCU->getCUDie()); + if (DIE *AbsSPDIE = AbstractSPDies.lookup(SP)) { // Pick up abstract subprogram DIE. - SPDie = new DIE(dwarf::DW_TAG_subprogram); - // If AbsSPDIE belongs to a different CU, use DW_FORM_ref_addr instead of - // DW_FORM_ref4. - SPCU->addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin, - InSameCU ? dwarf::DW_FORM_ref4 : dwarf::DW_FORM_ref_addr, - AbsSPDIE); - SPCU->addDie(SPDie); + SPDie = SPCU->createAndAddDIE(dwarf::DW_TAG_subprogram, *SPCU->getCUDie()); + SPCU->addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin, AbsSPDIE); } else { DISubprogram SPDecl = SP.getFunctionDeclaration(); if (!SPDecl.isSubprogram()) { @@ -384,32 +386,31 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, // function then gdb prefers the definition at top level and but does not // expect specification DIE in parent function. So avoid creating // specification DIE for a function defined inside a function. - if (SP.isDefinition() && !SP.getContext().isCompileUnit() && - !SP.getContext().isFile() && - !isSubprogramContext(SP.getContext())) { + DIScope SPContext = resolve(SP.getContext()); + if (SP.isDefinition() && !SPContext.isCompileUnit() && + !SPContext.isFile() && + !isSubprogramContext(SPContext)) { SPCU->addFlag(SPDie, dwarf::DW_AT_declaration); // Add arguments. DICompositeType SPTy = SP.getType(); DIArray Args = SPTy.getTypeArray(); - unsigned SPTag = SPTy.getTag(); + uint16_t SPTag = SPTy.getTag(); if (SPTag == dwarf::DW_TAG_subroutine_type) for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { - DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); - DIType ATy = DIType(Args.getElement(i)); + DIE *Arg = + SPCU->createAndAddDIE(dwarf::DW_TAG_formal_parameter, *SPDie); + DIType ATy(Args.getElement(i)); SPCU->addType(Arg, ATy); if (ATy.isArtificial()) SPCU->addFlag(Arg, dwarf::DW_AT_artificial); if (ATy.isObjectPointer()) - SPCU->addDIEEntry(SPDie, dwarf::DW_AT_object_pointer, - dwarf::DW_FORM_ref4, Arg); - SPDie->addChild(Arg); + SPCU->addDIEEntry(SPDie, dwarf::DW_AT_object_pointer, Arg); } DIE *SPDeclDie = SPDie; - SPDie = new DIE(dwarf::DW_TAG_subprogram); - SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, - dwarf::DW_FORM_ref4, SPDeclDie); - SPCU->addDie(SPDie); + SPDie = + SPCU->createAndAddDIE(dwarf::DW_TAG_subprogram, *SPCU->getCUDie()); + SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, SPDeclDie); } } } @@ -431,18 +432,39 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, return SPDie; } +/// Check whether we should create a DIE for the given Scope, return true +/// if we don't create a DIE (the corresponding DIE is null). +bool DwarfDebug::isLexicalScopeDIENull(LexicalScope *Scope) { + if (Scope->isAbstractScope()) + return false; + + // We don't create a DIE if there is no Range. + const SmallVectorImpl<InsnRange> &Ranges = Scope->getRanges(); + if (Ranges.empty()) + return true; + + if (Ranges.size() > 1) + return false; + + // We don't create a DIE if we have a single Range and the end label + // is null. + SmallVectorImpl<InsnRange>::const_iterator RI = Ranges.begin(); + MCSymbol *End = getLabelAfterInsn(RI->second); + return !End; +} + // Construct new DW_TAG_lexical_block for this scope and attach // DW_AT_low_pc/DW_AT_high_pc labels. DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { + if (isLexicalScopeDIENull(Scope)) + return 0; + DIE *ScopeDIE = new DIE(dwarf::DW_TAG_lexical_block); if (Scope->isAbstractScope()) return ScopeDIE; const SmallVectorImpl<InsnRange> &Ranges = Scope->getRanges(); - if (Ranges.empty()) - return 0; - // If we have multiple ranges, emit them into the range section. if (Ranges.size() > 1) { // .debug_range section has not been laid out yet. Emit offset in @@ -467,8 +489,7 @@ DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU, SmallVectorImpl<InsnRange>::const_iterator RI = Ranges.begin(); MCSymbol *Start = getLabelBeforeInsn(RI->first); MCSymbol *End = getLabelAfterInsn(RI->second); - - if (End == 0) return 0; + assert(End && "End label should not be null!"); assert(Start->isDefined() && "Invalid starting label for an inlined scope!"); assert(End->isDefined() && "Invalid end label for an inlined scope!"); @@ -498,8 +519,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, } DIE *ScopeDIE = new DIE(dwarf::DW_TAG_inlined_subroutine); - TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin, - dwarf::DW_FORM_ref4, OriginDIE); + TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin, OriginDIE); if (Ranges.size() > 1) { // .debug_range section has not been laid out yet. Emit offset in @@ -535,26 +555,10 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, // Add the call site information to the DIE. DILocation DL(Scope->getInlinedAt()); - TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, + TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, None, getOrCreateSourceID(DL.getFilename(), DL.getDirectory(), TheCU->getUniqueID())); - TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber()); - - // Track the start label for this inlined function. - //.debug_inlined section specification does not clearly state how - // to emit inlined scopes that are split into multiple instruction ranges. - // For now, use the first instruction range and emit low_pc/high_pc pair and - // corresponding the .debug_inlined section entry for this pair. - if (Asm->MAI->doesDwarfUseInlineInfoSection()) { - MCSymbol *StartLabel = getLabelBeforeInsn(Ranges.begin()->first); - InlineInfoMap::iterator I = InlineInfo.find(InlinedSP); - - if (I == InlineInfo.end()) { - InlineInfo[InlinedSP].push_back(std::make_pair(StartLabel, ScopeDIE)); - InlinedSPNodes.push_back(InlinedSP); - } else - I->second.push_back(std::make_pair(StartLabel, ScopeDIE)); - } + TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, None, DL.getLineNumber()); // Add name to the name table, we do this here because we're guaranteed // to have concrete versions of our DW_TAG_inlined_subprogram nodes. @@ -563,26 +567,16 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, return ScopeDIE; } -// Construct a DIE for this scope. -DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { - if (!Scope || !Scope->getScopeNode()) - return NULL; - - DIScope DS(Scope->getScopeNode()); - // Early return to avoid creating dangling variable|scope DIEs. - if (!Scope->getInlinedAt() && DS.isSubprogram() && Scope->isAbstractScope() && - !TheCU->getDIE(DS)) - return NULL; - - SmallVector<DIE *, 8> Children; - DIE *ObjectPointer = NULL; +DIE *DwarfDebug::createScopeChildrenDIE(CompileUnit *TheCU, LexicalScope *Scope, + SmallVectorImpl<DIE*> &Children) { + DIE *ObjectPointer = NULL; // Collect arguments for current function. if (LScopes.isCurrentFunctionScope(Scope)) for (unsigned i = 0, N = CurrentFnArguments.size(); i < N; ++i) if (DbgVariable *ArgDV = CurrentFnArguments[i]) if (DIE *Arg = - TheCU->constructVariableDIE(ArgDV, Scope->isAbstractScope())) { + TheCU->constructVariableDIE(*ArgDV, Scope->isAbstractScope())) { Children.push_back(Arg); if (ArgDV->isObjectPointer()) ObjectPointer = Arg; } @@ -591,7 +585,7 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { const SmallVectorImpl<DbgVariable *> &Variables =ScopeVariables.lookup(Scope); for (unsigned i = 0, N = Variables.size(); i < N; ++i) if (DIE *Variable = - TheCU->constructVariableDIE(Variables[i], Scope->isAbstractScope())) { + TheCU->constructVariableDIE(*Variables[i], Scope->isAbstractScope())) { Children.push_back(Variable); if (Variables[i]->isObjectPointer()) ObjectPointer = Variable; } @@ -599,6 +593,23 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { for (unsigned j = 0, M = Scopes.size(); j < M; ++j) if (DIE *Nested = constructScopeDIE(TheCU, Scopes[j])) Children.push_back(Nested); + return ObjectPointer; +} + +// Construct a DIE for this scope. +DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { + if (!Scope || !Scope->getScopeNode()) + return NULL; + + DIScope DS(Scope->getScopeNode()); + + SmallVector<DIE *, 8> Children; + DIE *ObjectPointer = NULL; + bool ChildrenCreated = false; + + // We try to create the scope DIE first, then the children DIEs. This will + // avoid creating un-used children then removing them later when we find out + // the scope DIE is null. DIE *ScopeDIE = NULL; if (Scope->getInlinedAt()) ScopeDIE = constructInlinedScopeDIE(TheCU, Scope); @@ -609,26 +620,41 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { // Note down abstract DIE. if (ScopeDIE) AbstractSPDies.insert(std::make_pair(DS, ScopeDIE)); - } - else - ScopeDIE = updateSubprogramScopeDIE(TheCU, DS); - } - else { + } else + ScopeDIE = updateSubprogramScopeDIE(TheCU, DISubprogram(DS)); + } else { + // Early exit when we know the scope DIE is going to be null. + if (isLexicalScopeDIENull(Scope)) + return NULL; + + // We create children here when we know the scope DIE is not going to be + // null and the children will be added to the scope DIE. + ObjectPointer = createScopeChildrenDIE(TheCU, Scope, Children); + ChildrenCreated = true; + // There is no need to emit empty lexical block DIE. std::pair<ImportedEntityMap::const_iterator, ImportedEntityMap::const_iterator> Range = std::equal_range( ScopesWithImportedEntities.begin(), ScopesWithImportedEntities.end(), std::pair<const MDNode *, const MDNode *>(DS, (const MDNode*)0), - CompareFirst()); + less_first()); if (Children.empty() && Range.first == Range.second) return NULL; ScopeDIE = constructLexicalScopeDIE(TheCU, Scope); + assert(ScopeDIE && "Scope DIE should not be null."); for (ImportedEntityMap::const_iterator i = Range.first; i != Range.second; ++i) constructImportedEntityDIE(TheCU, i->second, ScopeDIE); } - if (!ScopeDIE) return NULL; + if (!ScopeDIE) { + assert(Children.empty() && + "We create children only when the scope DIE is not null."); + return NULL; + } + if (!ChildrenCreated) + // We create children when the scope DIE is not null. + ObjectPointer = createScopeChildrenDIE(TheCU, Scope, Children); // Add children for (SmallVectorImpl<DIE *>::iterator I = Children.begin(), @@ -636,8 +662,7 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { ScopeDIE->addChild(*I); if (DS.isSubprogram() && ObjectPointer != NULL) - TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, - dwarf::DW_FORM_ref4, ObjectPointer); + TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, ObjectPointer); if (DS.isSubprogram()) TheCU->addPubTypes(DISubprogram(DS)); @@ -653,8 +678,10 @@ unsigned DwarfDebug::getOrCreateSourceID(StringRef FileName, StringRef DirName, unsigned CUID) { // If we use .loc in assembly, we can't separate .file entries according to // compile units. Thus all files will belong to the default compile unit. - if (Asm->TM.hasMCUseLoc() && - Asm->OutStreamer.getKind() == MCStreamer::SK_AsmStreamer) + + // FIXME: add a better feature test than hasRawTextSupport. Even better, + // extend .file to support this. + if (Asm->TM.hasMCUseLoc() && Asm->OutStreamer.hasRawTextSupport()) CUID = 0; // If FE did not provide a file name, then assume stdin. @@ -689,14 +716,12 @@ unsigned DwarfDebug::getOrCreateSourceID(StringRef FileName, // Create new CompileUnit for the given metadata node with tag // DW_TAG_compile_unit. -CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { - DICompileUnit DIUnit(N); +CompileUnit *DwarfDebug::constructCompileUnit(DICompileUnit DIUnit) { StringRef FN = DIUnit.getFilename(); CompilationDir = DIUnit.getDirectory(); DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); - CompileUnit *NewCU = new CompileUnit(GlobalCUIndexCount++, - DIUnit.getLanguage(), Die, N, Asm, + CompileUnit *NewCU = new CompileUnit(GlobalCUIndexCount++, Die, DIUnit, Asm, this, &InfoHolder); FileIDCUMap[NewCU->getUniqueID()] = 0; @@ -723,31 +748,57 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { // Use a single line table if we are using .loc and generating assembly. bool UseTheFirstCU = - (Asm->TM.hasMCUseLoc() && - Asm->OutStreamer.getKind() == MCStreamer::SK_AsmStreamer) || - (NewCU->getUniqueID() == 0); + (Asm->TM.hasMCUseLoc() && Asm->OutStreamer.hasRawTextSupport()) || + (NewCU->getUniqueID() == 0); - // DW_AT_stmt_list is a offset of line number information for this - // compile unit in debug_line section. For split dwarf this is - // left in the skeleton CU and so not included. - // The line table entries are not always emitted in assembly, so it - // is not okay to use line_table_start here. if (!useSplitDwarf()) { + // DW_AT_stmt_list is a offset of line number information for this + // compile unit in debug_line section. For split dwarf this is + // left in the skeleton CU and so not included. + // The line table entries are not always emitted in assembly, so it + // is not okay to use line_table_start here. if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, - UseTheFirstCU ? - Asm->GetTempSymbol("section_line") : LineTableStartSym); + NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_sec_offset, + UseTheFirstCU ? Asm->GetTempSymbol("section_line") + : LineTableStartSym); else if (UseTheFirstCU) NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0); else NewCU->addDelta(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, LineTableStartSym, DwarfLineSectionSym); + + // If we're using split dwarf the compilation dir is going to be in the + // skeleton CU and so we don't need to duplicate it here. + if (!CompilationDir.empty()) + NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir); + + // Flags to let the linker know we have emitted new style pubnames. Only + // emit it here if we don't have a skeleton CU for split dwarf. + if (GenerateGnuPubSections) { + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + NewCU->addLabel(Die, dwarf::DW_AT_GNU_pubnames, + dwarf::DW_FORM_sec_offset, + Asm->GetTempSymbol("gnu_pubnames", + NewCU->getUniqueID())); + else + NewCU->addDelta(Die, dwarf::DW_AT_GNU_pubnames, dwarf::DW_FORM_data4, + Asm->GetTempSymbol("gnu_pubnames", + NewCU->getUniqueID()), + DwarfGnuPubNamesSectionSym); + + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + NewCU->addLabel(Die, dwarf::DW_AT_GNU_pubtypes, + dwarf::DW_FORM_sec_offset, + Asm->GetTempSymbol("gnu_pubtypes", + NewCU->getUniqueID())); + else + NewCU->addDelta(Die, dwarf::DW_AT_GNU_pubtypes, dwarf::DW_FORM_data4, + Asm->GetTempSymbol("gnu_pubtypes", + NewCU->getUniqueID()), + DwarfGnuPubTypesSectionSym); + } } - // If we're using split dwarf the compilation dir is going to be in the - // skeleton CU and so we don't need to duplicate it here. - if (!useSplitDwarf() && !CompilationDir.empty()) - NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir); if (DIUnit.isOptimized()) NewCU->addFlag(Die, dwarf::DW_AT_APPLE_optimized); @@ -764,13 +815,17 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { InfoHolder.addUnit(NewCU); - CUMap.insert(std::make_pair(N, NewCU)); + CUMap.insert(std::make_pair(DIUnit, NewCU)); + CUDieMap.insert(std::make_pair(Die, NewCU)); return NewCU; } // Construct subprogram DIE. -void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, - const MDNode *N) { +void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, const MDNode *N) { + // FIXME: We should only call this routine once, however, during LTO if a + // program is defined in multiple CUs we could end up calling it out of + // beginModule as we walk the CUs. + CompileUnit *&CURef = SPMap[N]; if (CURef) return; @@ -784,15 +839,8 @@ void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, DIE *SubprogramDie = TheCU->getOrCreateSubprogramDIE(SP); - // Add to map. - TheCU->insertDIE(N, SubprogramDie); - - // Add to context owner. - TheCU->addToContextOwner(SubprogramDie, SP.getContext()); - - // Expose as global, if requested. - if (GenerateDwarfPubNamesSection) - TheCU->addGlobalName(SP.getName(), SubprogramDie); + // Expose as a global name. + TheCU->addGlobalName(SP.getName(), SubprogramDie, resolve(SP.getContext())); } void DwarfDebug::constructImportedEntityDIE(CompileUnit *TheCU, @@ -833,10 +881,9 @@ void DwarfDebug::constructImportedEntityDIE(CompileUnit *TheCU, unsigned FileID = getOrCreateSourceID(Module.getContext().getFilename(), Module.getContext().getDirectory(), TheCU->getUniqueID()); - TheCU->addUInt(IMDie, dwarf::DW_AT_decl_file, 0, FileID); - TheCU->addUInt(IMDie, dwarf::DW_AT_decl_line, 0, Module.getLineNumber()); - TheCU->addDIEEntry(IMDie, dwarf::DW_AT_import, dwarf::DW_FORM_ref4, - EntityDie); + TheCU->addUInt(IMDie, dwarf::DW_AT_decl_file, None, FileID); + TheCU->addUInt(IMDie, dwarf::DW_AT_decl_line, None, Module.getLineNumber()); + TheCU->addDIEEntry(IMDie, dwarf::DW_AT_import, EntityDie); StringRef Name = Module.getName(); if (!Name.empty()) TheCU->addString(IMDie, dwarf::DW_AT_name, Name); @@ -857,6 +904,7 @@ void DwarfDebug::beginModule() { NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); if (!CU_Nodes) return; + TypeIdentifierMap = generateDITypeIdentifierMap(CU_Nodes); // Emit initial sections so we can reference labels later. emitSectionLabels(); @@ -870,10 +918,10 @@ void DwarfDebug::beginModule() { DIImportedEntity(ImportedEntities.getElement(i)).getContext(), ImportedEntities.getElement(i))); std::sort(ScopesWithImportedEntities.begin(), - ScopesWithImportedEntities.end(), CompareFirst()); + ScopesWithImportedEntities.end(), less_first()); DIArray GVs = CUNode.getGlobalVariables(); for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) - CU->createGlobalVariableDIE(GVs.getElement(i)); + CU->createGlobalVariableDIE(DIGlobalVariable(GVs.getElement(i))); DIArray SPs = CUNode.getSubprograms(); for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) constructSubprogramDIE(CU, SPs.getElement(i)); @@ -887,22 +935,13 @@ void DwarfDebug::beginModule() { // available. for (unsigned i = 0, e = ImportedEntities.getNumElements(); i != e; ++i) constructImportedEntityDIE(CU, ImportedEntities.getElement(i)); - // If we're splitting the dwarf out now that we've got the entire - // CU then construct a skeleton CU based upon it. - if (useSplitDwarf()) { - // This should be a unique identifier when we want to build .dwp files. - CU->addUInt(CU->getCUDie(), dwarf::DW_AT_GNU_dwo_id, - dwarf::DW_FORM_data8, 0); - // Now construct the skeleton CU associated. - constructSkeletonCU(CUNode); - } } // Tell MMI that we have debug info. MMI->setDebugInfoAvailability(true); // Prime section data. - SectionMap.insert(Asm->getObjFileLowering().getTextSection()); + SectionMap[Asm->getObjFileLowering().getTextSection()]; } // Attach DW_AT_inline attribute with inlined subprogram DIEs. @@ -911,21 +950,20 @@ void DwarfDebug::computeInlinedDIEs() { for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(), AE = InlinedSubprogramDIEs.end(); AI != AE; ++AI) { DIE *ISP = *AI; - FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined); + FirstCU->addUInt(ISP, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined); } for (DenseMap<const MDNode *, DIE *>::iterator AI = AbstractSPDies.begin(), AE = AbstractSPDies.end(); AI != AE; ++AI) { DIE *ISP = AI->second; if (InlinedSubprogramDIEs.count(ISP)) continue; - FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined); + FirstCU->addUInt(ISP, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined); } } // Collect info for variables that were optimized out. void DwarfDebug::collectDeadVariables() { const Module *M = MMI->getModule(); - DenseMap<const MDNode *, LexicalScope *> DeadFnScopeMap; if (NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu")) { for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { @@ -933,37 +971,38 @@ void DwarfDebug::collectDeadVariables() { DIArray Subprograms = TheCU.getSubprograms(); for (unsigned i = 0, e = Subprograms.getNumElements(); i != e; ++i) { DISubprogram SP(Subprograms.getElement(i)); - if (ProcessedSPNodes.count(SP) != 0) continue; - if (!SP.isSubprogram()) continue; - if (!SP.isDefinition()) continue; + if (ProcessedSPNodes.count(SP) != 0) + continue; + if (!SP.isSubprogram()) + continue; + if (!SP.isDefinition()) + continue; DIArray Variables = SP.getVariables(); - if (Variables.getNumElements() == 0) continue; - - LexicalScope *Scope = - new LexicalScope(NULL, DIDescriptor(SP), NULL, false); - DeadFnScopeMap[SP] = Scope; + if (Variables.getNumElements() == 0) + continue; // Construct subprogram DIE and add variables DIEs. CompileUnit *SPCU = CUMap.lookup(TheCU); assert(SPCU && "Unable to find Compile Unit!"); + // FIXME: See the comment in constructSubprogramDIE about duplicate + // subprogram DIEs. constructSubprogramDIE(SPCU, SP); - DIE *ScopeDIE = SPCU->getDIE(SP); + DIE *SPDIE = SPCU->getDIE(SP); for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) { DIVariable DV(Variables.getElement(vi)); - if (!DV.isVariable()) continue; - DbgVariable NewVar(DV, NULL); + if (!DV.isVariable()) + continue; + DbgVariable NewVar(DV, NULL, this); if (DIE *VariableDIE = - SPCU->constructVariableDIE(&NewVar, Scope->isAbstractScope())) - ScopeDIE->addChild(VariableDIE); + SPCU->constructVariableDIE(NewVar, false)) + SPDIE->addChild(VariableDIE); } } } } - DeleteContainerSeconds(DeadFnScopeMap); } -// Type Signature [7.27] computation code. -typedef ArrayRef<uint8_t> HashValue; +// Type Signature [7.27] and ODR Hash code. /// \brief Grabs the string in whichever attribute is passed in and returns /// a reference to it. Returns "" if the attribute doesn't exist. @@ -976,100 +1015,6 @@ static StringRef getDIEStringAttr(DIE *Die, unsigned Attr) { return StringRef(""); } -/// \brief Adds the string in \p Str to the hash in \p Hash. This also hashes -/// a trailing NULL with the string. -static void addStringToHash(MD5 &Hash, StringRef Str) { - DEBUG(dbgs() << "Adding string " << Str << " to hash.\n"); - Hash.update(Str); - Hash.update(makeArrayRef((uint8_t)'\0')); -} - -// FIXME: These are copied and only slightly modified out of LEB128.h. - -/// \brief Adds the unsigned in \p N to the hash in \p Hash. This also encodes -/// the unsigned as a ULEB128. -static void addULEB128ToHash(MD5 &Hash, uint64_t Value) { - DEBUG(dbgs() << "Adding ULEB128 " << Value << " to hash.\n"); - do { - uint8_t Byte = Value & 0x7f; - Value >>= 7; - if (Value != 0) - Byte |= 0x80; // Mark this byte to show that more bytes will follow. - Hash.update(Byte); - } while (Value != 0); -} - -/// \brief Including \p Parent adds the context of Parent to \p Hash. -static void addParentContextToHash(MD5 &Hash, DIE *Parent) { - - DEBUG(dbgs() << "Adding parent context to hash...\n"); - - // [7.27.2] For each surrounding type or namespace beginning with the - // outermost such construct... - SmallVector<DIE *, 1> Parents; - while (Parent->getTag() != dwarf::DW_TAG_compile_unit) { - Parents.push_back(Parent); - Parent = Parent->getParent(); - } - - // Reverse iterate over our list to go from the outermost construct to the - // innermost. - for (SmallVectorImpl<DIE *>::reverse_iterator I = Parents.rbegin(), - E = Parents.rend(); - I != E; ++I) { - DIE *Die = *I; - - // ... Append the letter "C" to the sequence... - addULEB128ToHash(Hash, 'C'); - - // ... Followed by the DWARF tag of the construct... - addULEB128ToHash(Hash, Die->getTag()); - - // ... Then the name, taken from the DW_AT_name attribute. - StringRef Name = getDIEStringAttr(Die, dwarf::DW_AT_name); - DEBUG(dbgs() << "... adding context: " << Name << "\n"); - if (!Name.empty()) - addStringToHash(Hash, Name); - } -} - -/// This is based on the type signature computation given in section 7.27 of the -/// DWARF4 standard. It is the md5 hash of a flattened description of the DIE with -/// the exception that we are hashing only the context and the name of the type. -static void addDIEODRSignature(MD5 &Hash, CompileUnit *CU, DIE *Die) { - - // Add the contexts to the hash. We won't be computing the ODR hash for - // function local types so it's safe to use the generic context hashing - // algorithm here. - // FIXME: If we figure out how to account for linkage in some way we could - // actually do this with a slight modification to the parent hash algorithm. - DIE *Parent = Die->getParent(); - if (Parent) - addParentContextToHash(Hash, Parent); - - // Add the current DIE information. - - // Add the DWARF tag of the DIE. - addULEB128ToHash(Hash, Die->getTag()); - - // Add the name of the type to the hash. - addStringToHash(Hash, getDIEStringAttr(Die, dwarf::DW_AT_name)); - - // Now get the result. - MD5::MD5Result Result; - Hash.final(Result); - - // ... take the least significant 8 bytes and store those as the attribute. - // Our MD5 implementation always returns its results in little endian, swap - // bytes appropriately. - uint64_t Signature = *reinterpret_cast<support::ulittle64_t *>(Result + 8); - - // FIXME: This should be added onto the type unit, not the type, but this - // works as an intermediate stage. - CU->addUInt(Die, dwarf::DW_AT_GNU_odr_signature, dwarf::DW_FORM_data8, - Signature); -} - /// Return true if the current DIE is contained within an anonymous namespace. static bool isContainedInAnonNamespace(DIE *Die) { DIE *Parent = Die->getParent(); @@ -1090,7 +1035,7 @@ static bool shouldAddODRHash(CompileUnit *CU, DIE *Die) { return CU->getLanguage() == dwarf::DW_LANG_C_plus_plus && getDIEStringAttr(Die, dwarf::DW_AT_name) != "" && !isContainedInAnonNamespace(Die); - } +} void DwarfDebug::finalizeModuleInfo() { // Collect info for variables that were optimized out. @@ -1099,43 +1044,102 @@ void DwarfDebug::finalizeModuleInfo() { // Attach DW_AT_inline attribute with inlined subprogram DIEs. computeInlinedDIEs(); - // Emit DW_AT_containing_type attribute to connect types with their - // vtable holding type. - for (DenseMap<const MDNode *, CompileUnit *>::iterator CUI = CUMap.begin(), - CUE = CUMap.end(); CUI != CUE; ++CUI) { - CompileUnit *TheCU = CUI->second; - TheCU->constructContainingTypeDIEs(); - } - // Split out type units and conditionally add an ODR tag to the split // out type. // FIXME: Do type splitting. for (unsigned i = 0, e = TypeUnits.size(); i != e; ++i) { - MD5 Hash; DIE *Die = TypeUnits[i]; + DIEHash Hash; // If we've requested ODR hashes and it's applicable for an ODR hash then // add the ODR signature now. + // FIXME: This should be added onto the type unit, not the type, but this + // works as an intermediate stage. if (GenerateODRHash && shouldAddODRHash(CUMap.begin()->second, Die)) - addDIEODRSignature(Hash, CUMap.begin()->second, Die); + CUMap.begin()->second->addUInt(Die, dwarf::DW_AT_GNU_odr_signature, + dwarf::DW_FORM_data8, + Hash.computeDIEODRSignature(*Die)); } - // Compute DIE offsets and sizes. + // Handle anything that needs to be done on a per-cu basis. + for (DenseMap<const MDNode *, CompileUnit *>::iterator CUI = CUMap.begin(), + CUE = CUMap.end(); + CUI != CUE; ++CUI) { + CompileUnit *TheCU = CUI->second; + // Emit DW_AT_containing_type attribute to connect types with their + // vtable holding type. + TheCU->constructContainingTypeDIEs(); + + // If we're splitting the dwarf out now that we've got the entire + // CU then construct a skeleton CU based upon it. + if (useSplitDwarf()) { + uint64_t ID = 0; + if (GenerateCUHash) { + DIEHash CUHash; + ID = CUHash.computeCUSignature(*TheCU->getCUDie()); + } + // This should be a unique identifier when we want to build .dwp files. + TheCU->addUInt(TheCU->getCUDie(), dwarf::DW_AT_GNU_dwo_id, + dwarf::DW_FORM_data8, ID); + // Now construct the skeleton CU associated. + CompileUnit *SkCU = constructSkeletonCU(TheCU); + // This should be a unique identifier when we want to build .dwp files. + SkCU->addUInt(SkCU->getCUDie(), dwarf::DW_AT_GNU_dwo_id, + dwarf::DW_FORM_data8, ID); + } + } + + // Compute DIE offsets and sizes. InfoHolder.computeSizeAndOffsets(); if (useSplitDwarf()) SkeletonHolder.computeSizeAndOffsets(); } void DwarfDebug::endSections() { - // Standard sections final addresses. - Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getTextSection()); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("text_end")); - Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getDataSection()); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("data_end")); + // Filter labels by section. + for (size_t n = 0; n < ArangeLabels.size(); n++) { + const SymbolCU &SCU = ArangeLabels[n]; + if (SCU.Sym->isInSection()) { + // Make a note of this symbol and it's section. + const MCSection *Section = &SCU.Sym->getSection(); + if (!Section->getKind().isMetadata()) + SectionMap[Section].push_back(SCU); + } else { + // Some symbols (e.g. common/bss on mach-o) can have no section but still + // appear in the output. This sucks as we rely on sections to build + // arange spans. We can do it without, but it's icky. + SectionMap[NULL].push_back(SCU); + } + } - // End text sections. - for (unsigned I = 0, E = SectionMap.size(); I != E; ++I) { - Asm->OutStreamer.SwitchSection(SectionMap[I]); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_end", I+1)); + // Build a list of sections used. + std::vector<const MCSection *> Sections; + for (SectionMapType::iterator it = SectionMap.begin(); it != SectionMap.end(); + it++) { + const MCSection *Section = it->first; + Sections.push_back(Section); + } + + // Sort the sections into order. + // This is only done to ensure consistent output order across different runs. + std::sort(Sections.begin(), Sections.end(), SectionSort); + + // Add terminating symbols for each section. + for (unsigned ID=0;ID<Sections.size();ID++) { + const MCSection *Section = Sections[ID]; + MCSymbol *Sym = NULL; + + if (Section) { + // We can't call MCSection::getLabelEndName, as it's only safe to do so + // if we know the section name up-front. For user-created sections, the resulting + // label may not be valid to use as a label. (section names can use a greater + // set of characters on some systems) + Sym = Asm->GetTempSymbol("debug_end", ID); + Asm->OutStreamer.SwitchSection(Section); + Asm->OutStreamer.EmitLabel(Sym); + } + + // Insert a final terminator. + SectionMap[Section].push_back(SymbolCU(NULL, Sym)); } } @@ -1152,6 +1156,8 @@ void DwarfDebug::endModule() { finalizeModuleInfo(); if (!useSplitDwarf()) { + emitDebugStr(); + // Emit all the DIEs into a debug info section. emitDebugInfo(); @@ -1170,15 +1176,12 @@ void DwarfDebug::endModule() { // Emit info into a debug macinfo section. emitDebugMacInfo(); - // Emit inline info. - // TODO: When we don't need the option anymore we - // can remove all of the code that this section - // depends upon. - if (useDarwinGDBCompat()) - emitDebugInlineInfo(); } else { // TODO: Fill this in for separated debug sections and separate // out information into new sections. + emitDebugStr(); + if (useSplitDwarf()) + emitDebugStrDWO(); // Emit the debug info section and compile units. emitDebugInfo(); @@ -1203,12 +1206,6 @@ void DwarfDebug::endModule() { // Emit DWO addresses. InfoHolder.emitAddresses(Asm->getObjFileLowering().getDwarfAddrSection()); - // Emit inline info. - // TODO: When we don't need the option anymore we - // can remove all of the code that this section - // depends upon. - if (useDarwinGDBCompat()) - emitDebugInlineInfo(); } // Emit info into the dwarf accelerator table sections. @@ -1219,20 +1216,11 @@ void DwarfDebug::endModule() { emitAccelTypes(); } - // Emit info into a debug pubnames section, if requested. - if (GenerateDwarfPubNamesSection) - emitDebugPubnames(); - - // Emit info into a debug pubtypes section. - // TODO: When we don't need the option anymore we can - // remove all of the code that adds to the table. - if (useDarwinGDBCompat()) - emitDebugPubTypes(); - - // Finally emit string information into a string table. - emitDebugStr(); - if (useSplitDwarf()) - emitDebugStrDWO(); + // Emit the pubnames and pubtypes sections if requested. + if (HasDwarfPubSections) { + emitDebugPubNames(GenerateGnuPubSections); + emitDebugPubTypes(GenerateGnuPubSections); + } // clean up. SPMap.clear(); @@ -1262,7 +1250,7 @@ DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &DV, if (!Scope) return NULL; - AbsDbgVariable = new DbgVariable(Var, NULL); + AbsDbgVariable = new DbgVariable(Var, NULL, this); addScopeVariable(Scope, AbsDbgVariable); AbstractVariables[Var] = AbsDbgVariable; return AbsDbgVariable; @@ -1311,7 +1299,7 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction *MF, continue; DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.second); - DbgVariable *RegVar = new DbgVariable(DV, AbsDbgVariable); + DbgVariable *RegVar = new DbgVariable(DV, AbsDbgVariable, this); RegVar->setFrameIndex(VP.first); if (!addCurrentFnArgument(MF, RegVar, Scope)) addScopeVariable(Scope, RegVar); @@ -1396,7 +1384,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, Processed.insert(DV); assert(MInsn->isDebugValue() && "History must begin with debug value"); DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc()); - DbgVariable *RegVar = new DbgVariable(DV, AbsVar); + DbgVariable *RegVar = new DbgVariable(DV, AbsVar, this); if (!addCurrentFnArgument(MF, RegVar, Scope)) addScopeVariable(Scope, RegVar); if (AbsVar) @@ -1459,7 +1447,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, if (!DV || !DV.isVariable() || !Processed.insert(DV)) continue; if (LexicalScope *Scope = LScopes.findLexicalScope(DV.getContext())) - addScopeVariable(Scope, new DbgVariable(DV, NULL)); + addScopeVariable(Scope, new DbgVariable(DV, NULL, this)); } } @@ -1602,36 +1590,45 @@ static DebugLoc getFnDebugLoc(DebugLoc DL, const LLVMContext &Ctx) { // Gather pre-function debug information. Assumes being called immediately // after the function entry point has been emitted. void DwarfDebug::beginFunction(const MachineFunction *MF) { - if (!MMI->hasDebugInfo()) return; + + // If there's no debug info for the function we're not going to do anything. + if (!MMI->hasDebugInfo()) + return; + + // Grab the lexical scopes for the function, if we don't have any of those + // then we're not going to be able to do anything. LScopes.initialize(*MF); - if (LScopes.empty()) return; + if (LScopes.empty()) + return; + + assert(UserVariables.empty() && DbgValues.empty() && "Maps weren't cleaned"); + + // Make sure that each lexical scope will have a begin/end label. identifyScopeMarkers(); // Set DwarfCompileUnitID in MCContext to the Compile Unit this function - // belongs to. + // belongs to so that we add to the correct per-cu line table in the + // non-asm case. LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); CompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode()); assert(TheCU && "Unable to find compile unit!"); - if (Asm->TM.hasMCUseLoc() && - Asm->OutStreamer.getKind() == MCStreamer::SK_AsmStreamer) + if (Asm->TM.hasMCUseLoc() && Asm->OutStreamer.hasRawTextSupport()) // Use a single line table if we are using .loc and generating assembly. Asm->OutStreamer.getContext().setDwarfCompileUnitID(0); else Asm->OutStreamer.getContext().setDwarfCompileUnitID(TheCU->getUniqueID()); - FunctionBeginSym = Asm->GetTempSymbol("func_begin", - Asm->getFunctionNumber()); + // Emit a label for the function so that we have a beginning address. + FunctionBeginSym = Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber()); // Assumes in correct section after the entry point. Asm->OutStreamer.EmitLabel(FunctionBeginSym); - assert(UserVariables.empty() && DbgValues.empty() && "Maps weren't cleaned"); - const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo(); // LiveUserVar - Map physreg numbers to the MDNode they contain. - std::vector<const MDNode*> LiveUserVar(TRI->getNumRegs()); + std::vector<const MDNode *> LiveUserVar(TRI->getNumRegs()); - for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); - I != E; ++I) { + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E; + ++I) { bool AtBlockEntry = true; for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); II != IE; ++II) { @@ -1642,22 +1639,21 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // Keep track of user variables. const MDNode *Var = - MI->getOperand(MI->getNumOperands() - 1).getMetadata(); + MI->getOperand(MI->getNumOperands() - 1).getMetadata(); // Variable is in a register, we need to check for clobbers. if (isDbgValueInDefinedReg(MI)) LiveUserVar[MI->getOperand(0).getReg()] = Var; // Check the history of this variable. - SmallVectorImpl<const MachineInstr*> &History = DbgValues[Var]; + SmallVectorImpl<const MachineInstr *> &History = DbgValues[Var]; if (History.empty()) { UserVariables.push_back(Var); // The first mention of a function argument gets the FunctionBeginSym // label, so arguments are visible when breaking at function entry. DIVariable DV(Var); if (DV.isVariable() && DV.getTag() == dwarf::DW_TAG_arg_variable && - DISubprogram(getDISubprogram(DV.getContext())) - .describes(MF->getFunction())) + getDISubprogram(DV.getContext()).describes(MF->getFunction())) LabelsBeforeInsn[MI] = FunctionBeginSym; } else { // We have seen this variable before. Try to coalesce DBG_VALUEs. @@ -1667,8 +1663,8 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { if (History.size() >= 2 && Prev->isIdenticalTo(History[History.size() - 2])) { DEBUG(dbgs() << "Coalescing identical DBG_VALUE entries:\n" - << "\t" << *Prev - << "\t" << *History[History.size() - 2] << "\n"); + << "\t" << *Prev << "\t" + << *History[History.size() - 2] << "\n"); History.pop_back(); } @@ -1679,11 +1675,11 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // Previous register assignment needs to terminate at the end of // its basic block. MachineBasicBlock::const_iterator LastMI = - PrevMBB->getLastNonDebugInstr(); + PrevMBB->getLastNonDebugInstr(); if (LastMI == PrevMBB->end()) { // Drop DBG_VALUE for empty range. DEBUG(dbgs() << "Dropping DBG_VALUE for empty range:\n" - << "\t" << *Prev << "\n"); + << "\t" << *Prev << "\n"); History.pop_back(); } else if (llvm::next(PrevMBB) != PrevMBB->getParent()->end()) // Terminate after LastMI. @@ -1705,11 +1701,12 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // Check if the instruction clobbers any registers with debug vars. for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(), - MOE = MI->operands_end(); MOI != MOE; ++MOI) { + MOE = MI->operands_end(); + MOI != MOE; ++MOI) { if (!MOI->isReg() || !MOI->isDef() || !MOI->getReg()) continue; - for (MCRegAliasIterator AI(MOI->getReg(), TRI, true); - AI.isValid(); ++AI) { + for (MCRegAliasIterator AI(MOI->getReg(), TRI, true); AI.isValid(); + ++AI) { unsigned Reg = *AI; const MDNode *Var = LiveUserVar[Reg]; if (!Var) @@ -1721,7 +1718,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { DbgValueHistoryMap::iterator HistI = DbgValues.find(Var); if (HistI == DbgValues.end()) continue; - SmallVectorImpl<const MachineInstr*> &History = HistI->second; + SmallVectorImpl<const MachineInstr *> &History = HistI->second; if (History.empty()) continue; const MachineInstr *Prev = History.back(); @@ -1743,7 +1740,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { for (DbgValueHistoryMap::iterator I = DbgValues.begin(), E = DbgValues.end(); I != E; ++I) { - SmallVectorImpl<const MachineInstr*> &History = I->second; + SmallVectorImpl<const MachineInstr *> &History = I->second; if (History.empty()) continue; @@ -1752,7 +1749,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { if (Prev->isDebugValue() && isDbgValueInDefinedReg(Prev)) { const MachineBasicBlock *PrevMBB = Prev->getParent(); MachineBasicBlock::const_iterator LastMI = - PrevMBB->getLastNonDebugInstr(); + PrevMBB->getLastNonDebugInstr(); if (LastMI == PrevMBB->end()) // Drop DBG_VALUE for empty range. History.pop_back(); @@ -1776,13 +1773,14 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // Record beginning of function. if (!PrologEndLoc.isUnknown()) { - DebugLoc FnStartDL = getFnDebugLoc(PrologEndLoc, - MF->getFunction()->getContext()); - recordSourceLine(FnStartDL.getLine(), FnStartDL.getCol(), - FnStartDL.getScope(MF->getFunction()->getContext()), - // We'd like to list the prologue as "not statements" but GDB behaves - // poorly if we do that. Revisit this with caution/GDB (7.5+) testing. - DWARF2_FLAG_IS_STMT); + DebugLoc FnStartDL = + getFnDebugLoc(PrologEndLoc, MF->getFunction()->getContext()); + recordSourceLine( + FnStartDL.getLine(), FnStartDL.getCol(), + FnStartDL.getScope(MF->getFunction()->getContext()), + // We'd like to list the prologue as "not statements" but GDB behaves + // poorly if we do that. Revisit this with caution/GDB (7.5+) testing. + DWARF2_FLAG_IS_STMT); } } @@ -1855,7 +1853,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { if (AbstractVariables.lookup(CleanDV)) continue; if (LexicalScope *Scope = LScopes.findAbstractScope(DV.getContext())) - addScopeVariable(Scope, new DbgVariable(DV, NULL)); + addScopeVariable(Scope, new DbgVariable(DV, NULL, this)); } } if (ProcessedSPNodes.count(AScope->getScopeNode()) == 0) @@ -1924,7 +1922,8 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S, // Emit Methods //===----------------------------------------------------------------------===// -// Compute the size and offset of a DIE. +// Compute the size and offset of a DIE. The offset is relative to start of the +// CU. It returns the offset after laying out the DIE. unsigned DwarfUnits::computeSizeAndOffset(DIE *Die, unsigned Offset) { // Get the children. @@ -1935,7 +1934,7 @@ DwarfUnits::computeSizeAndOffset(DIE *Die, unsigned Offset) { // Get the abbreviation for this DIE. unsigned AbbrevNumber = Die->getAbbrevNumber(); - const DIEAbbrev *Abbrev = Abbreviations->at(AbbrevNumber - 1); + const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1]; // Set DIE offset Die->setOffset(Offset); @@ -1967,19 +1966,23 @@ DwarfUnits::computeSizeAndOffset(DIE *Die, unsigned Offset) { return Offset; } -// Compute the size and offset of all the DIEs. +// Compute the size and offset for each DIE. void DwarfUnits::computeSizeAndOffsets() { - // Offset from the beginning of debug info section. + // Offset from the first CU in the debug info section is 0 initially. unsigned SecOffset = 0; + + // Iterate over each compile unit and set the size and offsets for each + // DIE within each compile unit. All offsets are CU relative. for (SmallVectorImpl<CompileUnit *>::iterator I = CUs.begin(), E = CUs.end(); I != E; ++I) { (*I)->setDebugInfoOffset(SecOffset); - unsigned Offset = - sizeof(int32_t) + // Length of Compilation Unit Info - sizeof(int16_t) + // DWARF version number - sizeof(int32_t) + // Offset Into Abbrev. Section - sizeof(int8_t); // Pointer Size (in bytes) + // CU-relative offset is reset to 0 here. + unsigned Offset = sizeof(int32_t) + // Length of Unit Info + (*I)->getHeaderSize(); // Unit-specific headers + + // EndOffset here is CU-relative, after laying out + // all of the CU DIE. unsigned EndOffset = computeSizeAndOffset((*I)->getCUDie(), Offset); SecOffset += EndOffset; } @@ -2006,9 +2009,16 @@ void DwarfDebug::emitSectionLabels() { DwarfLineSectionSym = emitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line"); emitSectionSym(Asm, TLOF.getDwarfLocSection()); - if (GenerateDwarfPubNamesSection) + if (GenerateGnuPubSections) { + DwarfGnuPubNamesSectionSym = + emitSectionSym(Asm, TLOF.getDwarfGnuPubNamesSection()); + DwarfGnuPubTypesSectionSym = + emitSectionSym(Asm, TLOF.getDwarfGnuPubTypesSection()); + } else if (HasDwarfPubSections) { emitSectionSym(Asm, TLOF.getDwarfPubNamesSection()); - emitSectionSym(Asm, TLOF.getDwarfPubTypesSection()); + emitSectionSym(Asm, TLOF.getDwarfPubTypesSection()); + } + DwarfStrSectionSym = emitSectionSym(Asm, TLOF.getDwarfStrSection(), "info_string"); if (useSplitDwarf()) { @@ -2028,10 +2038,10 @@ void DwarfDebug::emitSectionLabels() { } // Recursively emits a debug information entry. -void DwarfDebug::emitDIE(DIE *Die, std::vector<DIEAbbrev *> *Abbrevs) { +void DwarfDebug::emitDIE(DIE *Die, ArrayRef<DIEAbbrev *> Abbrevs) { // Get the abbreviation for this DIE. unsigned AbbrevNumber = Die->getAbbrevNumber(); - const DIEAbbrev *Abbrev = Abbrevs->at(AbbrevNumber - 1); + const DIEAbbrev *Abbrev = Abbrevs[AbbrevNumber - 1]; // Emit the code (index) for the abbreviation. if (Asm->isVerbose()) @@ -2046,27 +2056,44 @@ void DwarfDebug::emitDIE(DIE *Die, std::vector<DIEAbbrev *> *Abbrevs) { // Emit the DIE attribute values. for (unsigned i = 0, N = Values.size(); i < N; ++i) { - unsigned Attr = AbbrevData[i].getAttribute(); - unsigned Form = AbbrevData[i].getForm(); + dwarf::Attribute Attr = AbbrevData[i].getAttribute(); + dwarf::Form Form = AbbrevData[i].getForm(); assert(Form && "Too many attributes for DIE (check abbreviation)"); if (Asm->isVerbose()) Asm->OutStreamer.AddComment(dwarf::AttributeString(Attr)); switch (Attr) { - case dwarf::DW_AT_abstract_origin: { + case dwarf::DW_AT_abstract_origin: + case dwarf::DW_AT_type: + case dwarf::DW_AT_friend: + case dwarf::DW_AT_specification: + case dwarf::DW_AT_import: + case dwarf::DW_AT_containing_type: { DIEEntry *E = cast<DIEEntry>(Values[i]); DIE *Origin = E->getEntry(); unsigned Addr = Origin->getOffset(); if (Form == dwarf::DW_FORM_ref_addr) { + assert(!useSplitDwarf() && "TODO: dwo files can't have relocations."); // For DW_FORM_ref_addr, output the offset from beginning of debug info // section. Origin->getOffset() returns the offset from start of the // compile unit. - DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; - Addr += Holder.getCUOffset(Origin->getCompileUnit()); + CompileUnit *CU = CUDieMap.lookup(Origin->getCompileUnit()); + assert(CU && "CUDie should belong to a CU."); + Addr += CU->getDebugInfoOffset(); + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + Asm->EmitLabelPlusOffset(DwarfInfoSectionSym, Addr, + DIEEntry::getRefAddrSize(Asm)); + else + Asm->EmitLabelOffsetDifference(DwarfInfoSectionSym, Addr, + DwarfInfoSectionSym, + DIEEntry::getRefAddrSize(Asm)); + } else { + // Make sure Origin belong to the same CU. + assert(Die->getCompileUnit() == Origin->getCompileUnit() && + "The referenced DIE should belong to the same CU in ref4"); + Asm->EmitInt32(Addr); } - Asm->OutStreamer.EmitIntValue(Addr, - Form == dwarf::DW_FORM_ref_addr ? DIEEntry::getRefAddrSize(Asm) : 4); break; } case dwarf::DW_AT_ranges: { @@ -2088,7 +2115,7 @@ void DwarfDebug::emitDIE(DIE *Die, std::vector<DIEAbbrev *> *Abbrevs) { case dwarf::DW_AT_location: { if (DIELabel *L = dyn_cast<DIELabel>(Values[i])) { if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - Asm->EmitLabelReference(L->getValue(), 4); + Asm->EmitSectionOffset(L->getValue(), DwarfDebugLocSectionSym); else Asm->EmitLabelDifference(L->getValue(), DwarfDebugLocSectionSym, 4); } else { @@ -2142,20 +2169,10 @@ void DwarfUnits::emitUnits(DwarfDebug *DD, TheCU->getUniqueID())); // Emit size of content not including length itself - unsigned ContentSize = Die->getSize() + - sizeof(int16_t) + // DWARF version number - sizeof(int32_t) + // Offset Into Abbrev. Section - sizeof(int8_t); // Pointer Size (in bytes) + Asm->OutStreamer.AddComment("Length of Unit"); + Asm->EmitInt32(TheCU->getHeaderSize() + Die->getSize()); - Asm->OutStreamer.AddComment("Length of Compilation Unit Info"); - Asm->EmitInt32(ContentSize); - Asm->OutStreamer.AddComment("DWARF version number"); - Asm->EmitInt16(DD->getDwarfVersion()); - Asm->OutStreamer.AddComment("Offset Into Abbrev. Section"); - Asm->EmitSectionOffset(Asm->GetTempSymbol(ASection->getLabelBeginName()), - ASectionSym); - Asm->OutStreamer.AddComment("Address Size (in bytes)"); - Asm->EmitInt8(Asm->getDataLayout().getPointerSize()); + TheCU->emitHeader(ASection, ASectionSym); DD->emitDIE(Die, Abbreviations); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol(USection->getLabelEndName(), @@ -2163,19 +2180,6 @@ void DwarfUnits::emitUnits(DwarfDebug *DD, } } -/// For a given compile unit DIE, returns offset from beginning of debug info. -unsigned DwarfUnits::getCUOffset(DIE *Die) { - assert(Die->getTag() == dwarf::DW_TAG_compile_unit && - "Input DIE should be compile unit in getCUOffset."); - for (SmallVectorImpl<CompileUnit *>::iterator I = CUs.begin(), - E = CUs.end(); I != E; ++I) { - CompileUnit *TheCU = *I; - if (TheCU->getCUDie() == Die) - return TheCU->getDebugInfoOffset(); - } - llvm_unreachable("The compile unit DIE should belong to CUs in DwarfUnits."); -} - // Emit the debug info section. void DwarfDebug::emitDebugInfo() { DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; @@ -2249,7 +2253,7 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) { // Emit visible names into a hashed accelerator table section. void DwarfDebug::emitAccelNames() { - DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset, + DwarfAccelTable AT(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)); for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) { @@ -2278,7 +2282,7 @@ void DwarfDebug::emitAccelNames() { // Emit objective C classes and categories into a hashed accelerator table // section. void DwarfDebug::emitAccelObjC() { - DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset, + DwarfAccelTable AT(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)); for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) { @@ -2306,7 +2310,7 @@ void DwarfDebug::emitAccelObjC() { // Emit namespace dies into a hashed accelerator table. void DwarfDebug::emitAccelNamespaces() { - DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset, + DwarfAccelTable AT(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)); for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) { @@ -2335,11 +2339,11 @@ void DwarfDebug::emitAccelNamespaces() { // Emit type dies into a hashed accelerator table. void DwarfDebug::emitAccelTypes() { std::vector<DwarfAccelTable::Atom> Atoms; - Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset, + Atoms.push_back(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)); - Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeTag, + Atoms.push_back(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2)); - Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeTypeFlags, + Atoms.push_back(DwarfAccelTable::Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)); DwarfAccelTable AT(Atoms); for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), @@ -2367,23 +2371,85 @@ void DwarfDebug::emitAccelTypes() { AT.Emit(Asm, SectionBegin, &InfoHolder); } -/// emitDebugPubnames - Emit visible names into a debug pubnames section. +// Public name handling. +// The format for the various pubnames: +// +// dwarf pubnames - offset/name pairs where the offset is the offset into the CU +// for the DIE that is named. +// +// gnu pubnames - offset/index value/name tuples where the offset is the offset +// into the CU and the index value is computed according to the type of value +// for the DIE that is named. +// +// For type units the offset is the offset of the skeleton DIE. For split dwarf +// it's the offset within the debug_info/debug_types dwo section, however, the +// reference in the pubname header doesn't change. + +/// computeIndexValue - Compute the gdb index value for the DIE and CU. +static dwarf::PubIndexEntryDescriptor computeIndexValue(CompileUnit *CU, + DIE *Die) { + dwarf::GDBIndexEntryLinkage Linkage = dwarf::GIEL_STATIC; + + // We could have a specification DIE that has our most of our knowledge, + // look for that now. + DIEValue *SpecVal = Die->findAttribute(dwarf::DW_AT_specification); + if (SpecVal) { + DIE *SpecDIE = cast<DIEEntry>(SpecVal)->getEntry(); + if (SpecDIE->findAttribute(dwarf::DW_AT_external)) + Linkage = dwarf::GIEL_EXTERNAL; + } else if (Die->findAttribute(dwarf::DW_AT_external)) + Linkage = dwarf::GIEL_EXTERNAL; + + switch (Die->getTag()) { + case dwarf::DW_TAG_class_type: + case dwarf::DW_TAG_structure_type: + case dwarf::DW_TAG_union_type: + case dwarf::DW_TAG_enumeration_type: + return dwarf::PubIndexEntryDescriptor( + dwarf::GIEK_TYPE, CU->getLanguage() != dwarf::DW_LANG_C_plus_plus + ? dwarf::GIEL_STATIC + : dwarf::GIEL_EXTERNAL); + case dwarf::DW_TAG_typedef: + case dwarf::DW_TAG_base_type: + case dwarf::DW_TAG_subrange_type: + return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_TYPE, dwarf::GIEL_STATIC); + case dwarf::DW_TAG_namespace: + return dwarf::GIEK_TYPE; + case dwarf::DW_TAG_subprogram: + return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_FUNCTION, Linkage); + case dwarf::DW_TAG_constant: + case dwarf::DW_TAG_variable: + return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_VARIABLE, Linkage); + case dwarf::DW_TAG_enumerator: + return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_VARIABLE, + dwarf::GIEL_STATIC); + default: + return dwarf::GIEK_NONE; + } +} + +/// emitDebugPubNames - Emit visible names into a debug pubnames section. /// -void DwarfDebug::emitDebugPubnames() { +void DwarfDebug::emitDebugPubNames(bool GnuStyle) { const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection(); + const MCSection *PSec = + GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubNamesSection() + : Asm->getObjFileLowering().getDwarfPubNamesSection(); typedef DenseMap<const MDNode*, CompileUnit*> CUMapType; for (CUMapType::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) { CompileUnit *TheCU = I->second; unsigned ID = TheCU->getUniqueID(); - if (TheCU->getGlobalNames().empty()) - continue; - // Start the dwarf pubnames section. - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfPubNamesSection()); + Asm->OutStreamer.SwitchSection(PSec); + + // Emit a label so we can reference the beginning of this pubname section. + if (GnuStyle) + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("gnu_pubnames", + TheCU->getUniqueID())); + // Emit the header. Asm->OutStreamer.AddComment("Length of Public Names Info"); Asm->EmitLabelDifference(Asm->GetTempSymbol("pubnames_end", ID), Asm->GetTempSymbol("pubnames_begin", ID), 4); @@ -2391,7 +2457,7 @@ void DwarfDebug::emitDebugPubnames() { Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_begin", ID)); Asm->OutStreamer.AddComment("DWARF Version"); - Asm->EmitInt16(DwarfVersion); + Asm->EmitInt16(dwarf::DW_PUBNAMES_VERSION); Asm->OutStreamer.AddComment("Offset of Compilation Unit Info"); Asm->EmitSectionOffset(Asm->GetTempSymbol(ISec->getLabelBeginName(), ID), @@ -2402,15 +2468,24 @@ void DwarfDebug::emitDebugPubnames() { Asm->GetTempSymbol(ISec->getLabelBeginName(), ID), 4); + // Emit the pubnames for this compilation unit. const StringMap<DIE*> &Globals = TheCU->getGlobalNames(); for (StringMap<DIE*>::const_iterator GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) { const char *Name = GI->getKeyData(); - const DIE *Entity = GI->second; + DIE *Entity = GI->second; Asm->OutStreamer.AddComment("DIE offset"); Asm->EmitInt32(Entity->getOffset()); + if (GnuStyle) { + dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheCU, Entity); + Asm->OutStreamer.AddComment( + Twine("Kind: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) + ", " + + dwarf::GDBIndexEntryLinkageString(Desc.Linkage)); + Asm->EmitInt8(Desc.toBits()); + } + if (Asm->isVerbose()) Asm->OutStreamer.AddComment("External Name"); Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1)); @@ -2422,55 +2497,78 @@ void DwarfDebug::emitDebugPubnames() { } } -void DwarfDebug::emitDebugPubTypes() { +void DwarfDebug::emitDebugPubTypes(bool GnuStyle) { + const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection(); + const MCSection *PSec = + GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubTypesSection() + : Asm->getObjFileLowering().getDwarfPubTypesSection(); + for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), - E = CUMap.end(); I != E; ++I) { + E = CUMap.end(); + I != E; ++I) { CompileUnit *TheCU = I->second; // Start the dwarf pubtypes section. - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfPubTypesSection()); + Asm->OutStreamer.SwitchSection(PSec); + + // Emit a label so we can reference the beginning of this pubtype section. + if (GnuStyle) + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("gnu_pubtypes", + TheCU->getUniqueID())); + + // Emit the header. Asm->OutStreamer.AddComment("Length of Public Types Info"); Asm->EmitLabelDifference( - Asm->GetTempSymbol("pubtypes_end", TheCU->getUniqueID()), - Asm->GetTempSymbol("pubtypes_begin", TheCU->getUniqueID()), 4); + Asm->GetTempSymbol("pubtypes_end", TheCU->getUniqueID()), + Asm->GetTempSymbol("pubtypes_begin", TheCU->getUniqueID()), 4); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_begin", - TheCU->getUniqueID())); + Asm->OutStreamer.EmitLabel( + Asm->GetTempSymbol("pubtypes_begin", TheCU->getUniqueID())); - if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DWARF Version"); - Asm->EmitInt16(DwarfVersion); + if (Asm->isVerbose()) + Asm->OutStreamer.AddComment("DWARF Version"); + Asm->EmitInt16(dwarf::DW_PUBTYPES_VERSION); Asm->OutStreamer.AddComment("Offset of Compilation Unit Info"); - const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection(); - Asm->EmitSectionOffset(Asm->GetTempSymbol(ISec->getLabelBeginName(), - TheCU->getUniqueID()), - DwarfInfoSectionSym); + Asm->EmitSectionOffset( + Asm->GetTempSymbol(ISec->getLabelBeginName(), TheCU->getUniqueID()), + DwarfInfoSectionSym); Asm->OutStreamer.AddComment("Compilation Unit Length"); - Asm->EmitLabelDifference(Asm->GetTempSymbol(ISec->getLabelEndName(), - TheCU->getUniqueID()), - Asm->GetTempSymbol(ISec->getLabelBeginName(), - TheCU->getUniqueID()), - 4); - - const StringMap<DIE*> &Globals = TheCU->getGlobalTypes(); - for (StringMap<DIE*>::const_iterator - GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) { + Asm->EmitLabelDifference( + Asm->GetTempSymbol(ISec->getLabelEndName(), TheCU->getUniqueID()), + Asm->GetTempSymbol(ISec->getLabelBeginName(), TheCU->getUniqueID()), 4); + + // Emit the pubtypes. + const StringMap<DIE *> &Globals = TheCU->getGlobalTypes(); + for (StringMap<DIE *>::const_iterator GI = Globals.begin(), + GE = Globals.end(); + GI != GE; ++GI) { const char *Name = GI->getKeyData(); DIE *Entity = GI->second; - if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset"); + if (Asm->isVerbose()) + Asm->OutStreamer.AddComment("DIE offset"); Asm->EmitInt32(Entity->getOffset()); - if (Asm->isVerbose()) Asm->OutStreamer.AddComment("External Name"); + if (GnuStyle) { + dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheCU, Entity); + Asm->OutStreamer.AddComment( + Twine("Kind: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) + ", " + + dwarf::GDBIndexEntryLinkageString(Desc.Linkage)); + Asm->EmitInt8(Desc.toBits()); + } + + if (Asm->isVerbose()) + Asm->OutStreamer.AddComment("External Name"); + // Emit the name with a terminating null byte. - Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1)); + Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength() + 1)); } Asm->OutStreamer.AddComment("End Mark"); Asm->EmitInt32(0); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_end", - TheCU->getUniqueID())); + Asm->OutStreamer.EmitLabel( + Asm->GetTempSymbol("pubtypes_end", TheCU->getUniqueID())); } } @@ -2649,18 +2747,178 @@ void DwarfDebug::emitDebugLoc() { } } -// Emit visible names into a debug aranges section. +struct SymbolCUSorter { + SymbolCUSorter(const MCStreamer &s) : Streamer(s) {} + const MCStreamer &Streamer; + + bool operator() (const SymbolCU &A, const SymbolCU &B) { + unsigned IA = A.Sym ? Streamer.GetSymbolOrder(A.Sym) : 0; + unsigned IB = B.Sym ? Streamer.GetSymbolOrder(B.Sym) : 0; + + // Symbols with no order assigned should be placed at the end. + // (e.g. section end labels) + if (IA == 0) + IA = (unsigned)(-1); + if (IB == 0) + IB = (unsigned)(-1); + return IA < IB; + } +}; + +static bool CUSort(const CompileUnit *A, const CompileUnit *B) { + return (A->getUniqueID() < B->getUniqueID()); +} + +struct ArangeSpan { + const MCSymbol *Start, *End; +}; + +// Emit a debug aranges section, containing a CU lookup for any +// address we can tie back to a CU. void DwarfDebug::emitDebugARanges() { // Start the dwarf aranges section. - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfARangesSection()); + Asm->OutStreamer + .SwitchSection(Asm->getObjFileLowering().getDwarfARangesSection()); + + typedef DenseMap<CompileUnit *, std::vector<ArangeSpan> > SpansType; + + SpansType Spans; + + // Build a list of sections used. + std::vector<const MCSection *> Sections; + for (SectionMapType::iterator it = SectionMap.begin(); it != SectionMap.end(); + it++) { + const MCSection *Section = it->first; + Sections.push_back(Section); + } + + // Sort the sections into order. + // This is only done to ensure consistent output order across different runs. + std::sort(Sections.begin(), Sections.end(), SectionSort); + + // Build a set of address spans, sorted by CU. + for (size_t SecIdx=0;SecIdx<Sections.size();SecIdx++) { + const MCSection *Section = Sections[SecIdx]; + SmallVector<SymbolCU, 8> &List = SectionMap[Section]; + if (List.size() < 2) + continue; + + // Sort the symbols by offset within the section. + SymbolCUSorter sorter(Asm->OutStreamer); + std::sort(List.begin(), List.end(), sorter); + + // If we have no section (e.g. common), just write out + // individual spans for each symbol. + if (Section == NULL) { + for (size_t n = 0; n < List.size(); n++) { + const SymbolCU &Cur = List[n]; + + ArangeSpan Span; + Span.Start = Cur.Sym; + Span.End = NULL; + if (Cur.CU) + Spans[Cur.CU].push_back(Span); + } + } else { + // Build spans between each label. + const MCSymbol *StartSym = List[0].Sym; + for (size_t n = 1; n < List.size(); n++) { + const SymbolCU &Prev = List[n - 1]; + const SymbolCU &Cur = List[n]; + + // Try and build the longest span we can within the same CU. + if (Cur.CU != Prev.CU) { + ArangeSpan Span; + Span.Start = StartSym; + Span.End = Cur.Sym; + Spans[Prev.CU].push_back(Span); + StartSym = Cur.Sym; + } + } + } + } + + const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection(); + unsigned PtrSize = Asm->getDataLayout().getPointerSize(); + + // Build a list of CUs used. + std::vector<CompileUnit *> CUs; + for (SpansType::iterator it = Spans.begin(); it != Spans.end(); it++) { + CompileUnit *CU = it->first; + CUs.push_back(CU); + } + + // Sort the CU list (again, to ensure consistent output order). + std::sort(CUs.begin(), CUs.end(), CUSort); + + // Emit an arange table for each CU we used. + for (size_t CUIdx=0;CUIdx<CUs.size();CUIdx++) { + CompileUnit *CU = CUs[CUIdx]; + std::vector<ArangeSpan> &List = Spans[CU]; + + // Emit size of content not including length itself. + unsigned ContentSize + = sizeof(int16_t) // DWARF ARange version number + + sizeof(int32_t) // Offset of CU in the .debug_info section + + sizeof(int8_t) // Pointer Size (in bytes) + + sizeof(int8_t); // Segment Size (in bytes) + + unsigned TupleSize = PtrSize * 2; + + // 7.20 in the Dwarf specs requires the table to be aligned to a tuple. + unsigned Padding = 0; + while (((sizeof(int32_t) + ContentSize + Padding) % TupleSize) != 0) + Padding++; + + ContentSize += Padding; + ContentSize += (List.size() + 1) * TupleSize; + + // For each compile unit, write the list of spans it covers. + Asm->OutStreamer.AddComment("Length of ARange Set"); + Asm->EmitInt32(ContentSize); + Asm->OutStreamer.AddComment("DWARF Arange version number"); + Asm->EmitInt16(dwarf::DW_ARANGES_VERSION); + Asm->OutStreamer.AddComment("Offset Into Debug Info Section"); + Asm->EmitSectionOffset( + Asm->GetTempSymbol(ISec->getLabelBeginName(), CU->getUniqueID()), + DwarfInfoSectionSym); + Asm->OutStreamer.AddComment("Address Size (in bytes)"); + Asm->EmitInt8(PtrSize); + Asm->OutStreamer.AddComment("Segment Size (in bytes)"); + Asm->EmitInt8(0); + + for (unsigned n = 0; n < Padding; n++) + Asm->EmitInt8(0xff); + + for (unsigned n = 0; n < List.size(); n++) { + const ArangeSpan &Span = List[n]; + Asm->EmitLabelReference(Span.Start, PtrSize); + + // Calculate the size as being from the span start to it's end. + if (Span.End) { + Asm->EmitLabelDifference(Span.End, Span.Start, PtrSize); + } else { + // For symbols without an end marker (e.g. common), we + // write a single arange entry containing just that one symbol. + uint64_t Size = SymSize[Span.Start]; + if (Size == 0) + Size = 1; + + Asm->OutStreamer.EmitIntValue(Size, PtrSize); + } + } + + Asm->OutStreamer.AddComment("ARange terminator"); + Asm->OutStreamer.EmitIntValue(0, PtrSize); + Asm->OutStreamer.EmitIntValue(0, PtrSize); + } } // Emit visible names into a debug ranges section. void DwarfDebug::emitDebugRanges() { // Start the dwarf ranges section. - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfRangesSection()); + Asm->OutStreamer + .SwitchSection(Asm->getObjFileLowering().getDwarfRangesSection()); unsigned char Size = Asm->getDataLayout().getPointerSize(); for (SmallVectorImpl<const MCSymbol *>::iterator I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end(); @@ -2681,103 +2939,19 @@ void DwarfDebug::emitDebugMacInfo() { } } -// Emit inline info using following format. -// Section Header: -// 1. length of section -// 2. Dwarf version number -// 3. address size. -// -// Entries (one "entry" for each function that was inlined): -// -// 1. offset into __debug_str section for MIPS linkage name, if exists; -// otherwise offset into __debug_str for regular function name. -// 2. offset into __debug_str section for regular function name. -// 3. an unsigned LEB128 number indicating the number of distinct inlining -// instances for the function. -// -// The rest of the entry consists of a {die_offset, low_pc} pair for each -// inlined instance; the die_offset points to the inlined_subroutine die in the -// __debug_info section, and the low_pc is the starting address for the -// inlining instance. -void DwarfDebug::emitDebugInlineInfo() { - if (!Asm->MAI->doesDwarfUseInlineInfoSection()) - return; - - if (!FirstCU) - return; - - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfDebugInlineSection()); - - Asm->OutStreamer.AddComment("Length of Debug Inlined Information Entry"); - Asm->EmitLabelDifference(Asm->GetTempSymbol("debug_inlined_end", 1), - Asm->GetTempSymbol("debug_inlined_begin", 1), 4); - - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_inlined_begin", 1)); - - Asm->OutStreamer.AddComment("Dwarf Version"); - Asm->EmitInt16(DwarfVersion); - Asm->OutStreamer.AddComment("Address Size (in bytes)"); - Asm->EmitInt8(Asm->getDataLayout().getPointerSize()); - - for (SmallVectorImpl<const MDNode *>::iterator I = InlinedSPNodes.begin(), - E = InlinedSPNodes.end(); I != E; ++I) { - - const MDNode *Node = *I; - InlineInfoMap::iterator II = InlineInfo.find(Node); - SmallVectorImpl<InlineInfoLabels> &Labels = II->second; - DISubprogram SP(Node); - StringRef LName = SP.getLinkageName(); - StringRef Name = SP.getName(); - - Asm->OutStreamer.AddComment("MIPS linkage name"); - if (LName.empty()) - Asm->EmitSectionOffset(InfoHolder.getStringPoolEntry(Name), - DwarfStrSectionSym); - else - Asm->EmitSectionOffset( - InfoHolder.getStringPoolEntry(Function::getRealLinkageName(LName)), - DwarfStrSectionSym); - - Asm->OutStreamer.AddComment("Function name"); - Asm->EmitSectionOffset(InfoHolder.getStringPoolEntry(Name), - DwarfStrSectionSym); - Asm->EmitULEB128(Labels.size(), "Inline count"); - - for (SmallVectorImpl<InlineInfoLabels>::iterator LI = Labels.begin(), - LE = Labels.end(); LI != LE; ++LI) { - if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset"); - Asm->EmitInt32(LI->second->getOffset()); - - if (Asm->isVerbose()) Asm->OutStreamer.AddComment("low_pc"); - Asm->OutStreamer.EmitSymbolValue(LI->first, - Asm->getDataLayout().getPointerSize()); - } - } - - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_inlined_end", 1)); -} - // DWARF5 Experimental Separate Dwarf emitters. // This DIE has the following attributes: DW_AT_comp_dir, DW_AT_stmt_list, // DW_AT_low_pc, DW_AT_high_pc, DW_AT_ranges, DW_AT_dwo_name, DW_AT_dwo_id, -// DW_AT_ranges_base, DW_AT_addr_base. If DW_AT_ranges is present, -// DW_AT_low_pc and DW_AT_high_pc are not used, and vice versa. -CompileUnit *DwarfDebug::constructSkeletonCU(const MDNode *N) { - DICompileUnit DIUnit(N); - CompilationDir = DIUnit.getDirectory(); +// DW_AT_ranges_base, DW_AT_addr_base. +CompileUnit *DwarfDebug::constructSkeletonCU(const CompileUnit *CU) { DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); - CompileUnit *NewCU = new CompileUnit(GlobalCUIndexCount++, - DIUnit.getLanguage(), Die, N, Asm, - this, &SkeletonHolder); + CompileUnit *NewCU = new CompileUnit(CU->getUniqueID(), Die, CU->getNode(), + Asm, this, &SkeletonHolder); NewCU->addLocalString(Die, dwarf::DW_AT_GNU_dwo_name, - DIUnit.getSplitDebugFilename()); - - // This should be a unique identifier when we want to build .dwp files. - NewCU->addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, 0); + CU->getNode().getSplitDebugFilename()); // Relocate to the beginning of the addr_base section, else 0 for the // beginning of the one for this compile unit. @@ -2804,6 +2978,35 @@ CompileUnit *DwarfDebug::constructSkeletonCU(const MDNode *N) { if (!CompilationDir.empty()) NewCU->addLocalString(Die, dwarf::DW_AT_comp_dir, CompilationDir); + // Flags to let the linker know we have emitted new style pubnames. + if (GenerateGnuPubSections) { + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + NewCU->addLabel(Die, dwarf::DW_AT_GNU_pubnames, dwarf::DW_FORM_sec_offset, + Asm->GetTempSymbol("gnu_pubnames", NewCU->getUniqueID())); + else + NewCU->addDelta(Die, dwarf::DW_AT_GNU_pubnames, dwarf::DW_FORM_data4, + Asm->GetTempSymbol("gnu_pubnames", NewCU->getUniqueID()), + DwarfGnuPubNamesSectionSym); + + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + NewCU->addLabel(Die, dwarf::DW_AT_GNU_pubtypes, dwarf::DW_FORM_sec_offset, + Asm->GetTempSymbol("gnu_pubtypes", NewCU->getUniqueID())); + else + NewCU->addDelta(Die, dwarf::DW_AT_GNU_pubtypes, dwarf::DW_FORM_data4, + Asm->GetTempSymbol("gnu_pubtypes", NewCU->getUniqueID()), + DwarfGnuPubTypesSectionSym); + } + + // Flag if we've emitted any ranges and their location for the compile unit. + if (DebugRangeSymbols.size()) { + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + NewCU->addLabel(Die, dwarf::DW_AT_GNU_ranges_base, + dwarf::DW_FORM_sec_offset, DwarfDebugRangeSectionSym); + else + NewCU->addUInt(Die, dwarf::DW_AT_GNU_ranges_base, dwarf::DW_FORM_data4, + 0); + } + SkeletonHolder.addUnit(NewCU); SkeletonCUs.push_back(NewCU); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index e14f9b1..cebac39 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -150,11 +150,12 @@ class DbgVariable { DbgVariable *AbsVar; // Corresponding Abstract variable, if any. const MachineInstr *MInsn; // DBG_VALUE instruction of the variable. int FrameIndex; + DwarfDebug *DD; public: // AbsVar may be NULL. - DbgVariable(DIVariable V, DbgVariable *AV) + DbgVariable(DIVariable V, DbgVariable *AV, DwarfDebug *DD) : Var(V), TheDIE(0), DotDebugLocOffset(~0U), AbsVar(AV), MInsn(0), - FrameIndex(~0) {} + FrameIndex(~0), DD(DD) {} // Accessors. DIVariable getVariable() const { return Var; } @@ -169,7 +170,7 @@ public: int getFrameIndex() const { return FrameIndex; } void setFrameIndex(int FI) { FrameIndex = FI; } // Translate tag to proper Dwarf tag. - unsigned getTag() const { + uint16_t getTag() const { if (Var.getTag() == dwarf::DW_TAG_arg_variable) return dwarf::DW_TAG_formal_parameter; @@ -208,6 +209,11 @@ public: return Var.getAddrElement(i); } DIType getType() const; + +private: + /// resolve - Look in the DwarfDebug map for the MDNode that + /// corresponds to the reference. + template <typename T> T resolve(DIRef<T> Ref) const; }; /// \brief Collects and handles information specific to a particular @@ -220,7 +226,7 @@ class DwarfUnits { FoldingSet<DIEAbbrev> *AbbreviationsSet; // A list of all the unique abbreviations in use. - std::vector<DIEAbbrev *> *Abbreviations; + std::vector<DIEAbbrev *> &Abbreviations; // A pointer to all units in the section. SmallVector<CompileUnit *, 1> CUs; @@ -243,7 +249,7 @@ class DwarfUnits { public: DwarfUnits(AsmPrinter *AP, FoldingSet<DIEAbbrev> *AS, - std::vector<DIEAbbrev *> *A, const char *Pref, + std::vector<DIEAbbrev *> &A, const char *Pref, BumpPtrAllocator &DA) : Asm(AP), AbbreviationsSet(AS), Abbreviations(A), StringPool(DA), NextStringPoolNumber(0), StringPref(Pref), AddressPool(), @@ -294,10 +300,13 @@ public: /// \brief Returns the address pool. AddrPool *getAddrPool() { return &AddressPool; } +}; - /// \brief for a given compile unit DIE, returns offset from beginning of - /// debug info. - unsigned getCUOffset(DIE *Die); +/// \brief Helper used to pair up a symbol and its DWARF compile unit. +struct SymbolCU { + SymbolCU(CompileUnit *CU, const MCSymbol *Sym) : Sym(Sym), CU(CU) {} + const MCSymbol *Sym; + CompileUnit *CU; }; /// \brief Collects and handles dwarf debug information. @@ -320,6 +329,14 @@ class DwarfDebug { // Maps subprogram MDNode with its corresponding CompileUnit. DenseMap <const MDNode *, CompileUnit *> SPMap; + // Maps a CU DIE with its corresponding CompileUnit. + DenseMap <const DIE *, CompileUnit *> CUDieMap; + + /// Maps MDNodes for type sysstem with the corresponding DIEs. These DIEs can + /// be shared across CUs, that is why we keep the map here instead + /// of in CompileUnit. + DenseMap<const MDNode *, DIE *> MDTypeNodeToDieMap; + // Used to uniquely define abbreviations. FoldingSet<DIEAbbrev> AbbreviationsSet; @@ -332,8 +349,15 @@ class DwarfDebug { // separated by a zero byte, mapped to a unique id. StringMap<unsigned, BumpPtrAllocator&> SourceIdMap; + // List of all labels used in aranges generation. + std::vector<SymbolCU> ArangeLabels; + + // Size of each symbol emitted (for those symbols that have a specific size). + DenseMap <const MCSymbol *, uint64_t> SymSize; + // Provides a unique id per text section. - SetVector<const MCSection*> SectionMap; + typedef DenseMap<const MCSection *, SmallVector<SymbolCU, 8> > SectionMapType; + SectionMapType SectionMap; // List of arguments for current function. SmallVector<DbgVariable *, 8> CurrentFnArguments; @@ -358,14 +382,6 @@ class DwarfDebug { // as DW_AT_inline. SmallPtrSet<DIE *, 4> InlinedSubprogramDIEs; - // Keep track of inlined functions and their location. This - // information is used to populate the debug_inlined section. - typedef std::pair<const MCSymbol *, DIE *> InlineInfoLabels; - typedef DenseMap<const MDNode *, - SmallVector<InlineInfoLabels, 4> > InlineInfoMap; - InlineInfoMap InlineInfo; - SmallVector<const MDNode *, 4> InlinedSPNodes; - // This is a collection of subprogram MDNodes that are processed to // create DIEs. SmallPtrSet<const MDNode *, 16> ProcessedSPNodes; @@ -406,6 +422,7 @@ class DwarfDebug { MCSymbol *DwarfDebugLocSectionSym, *DwarfLineSectionSym, *DwarfAddrSectionSym; MCSymbol *FunctionBeginSym, *FunctionEndSym; MCSymbol *DwarfAbbrevDWOSectionSym, *DwarfStrDWOSectionSym; + MCSymbol *DwarfGnuPubNamesSectionSym, *DwarfGnuPubTypesSectionSym; // As an optimization, there is no need to emit an entry in the directory // table for the same directory as DW_AT_comp_dir. @@ -420,9 +437,6 @@ class DwarfDebug { // Holders for the various debug information flags that we might need to // have exposed. See accessor functions below for description. - // Whether or not we're emitting info for older versions of gdb on darwin. - bool IsDarwinGDBCompat; - // Holder for imported entities. typedef SmallVector<std::pair<const MDNode *, const MDNode *>, 32> ImportedEntityMap; @@ -431,12 +445,16 @@ class DwarfDebug { // Holder for types that are going to be extracted out into a type unit. std::vector<DIE *> TypeUnits; + // Whether to emit the pubnames/pubtypes sections. + bool HasDwarfPubSections; + + // Version of dwarf we're emitting. + unsigned DwarfVersion; + // DWARF5 Experimental Options bool HasDwarfAccelTables; bool HasSplitDwarf; - unsigned DwarfVersion; - // Separated Dwarf Variables // In general these will all be for bits that are left in the // original object file, rather than things that are meant @@ -454,6 +472,9 @@ class DwarfDebug { // Holder for the skeleton information. DwarfUnits SkeletonHolder; + // Maps from a type identifier to the actual MDNode. + DITypeIdentifierMap TypeIdentifierMap; + private: void addScopeVariable(LexicalScope *LS, DbgVariable *Var); @@ -465,11 +486,14 @@ private: /// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global /// variables in this scope then create and insert DIEs for these /// variables. - DIE *updateSubprogramScopeDIE(CompileUnit *SPCU, const MDNode *SPNode); + DIE *updateSubprogramScopeDIE(CompileUnit *SPCU, DISubprogram SP); /// \brief Construct new DW_TAG_lexical_block for this scope and /// attach DW_AT_low_pc/DW_AT_high_pc labels. DIE *constructLexicalScopeDIE(CompileUnit *TheCU, LexicalScope *Scope); + /// A helper function to check whether the DIE for a given Scope is going + /// to be null. + bool isLexicalScopeDIENull(LexicalScope *Scope); /// \brief This scope represents inlined body of a function. Construct /// DIE to represent this concrete inlined copy of the function. @@ -477,6 +501,9 @@ private: /// \brief Construct a DIE for this scope. DIE *constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope); + /// A helper function to create children of a Scope DIE. + DIE *createScopeChildrenDIE(CompileUnit *TheCU, LexicalScope *Scope, + SmallVectorImpl<DIE*> &Children); /// \brief Emit initial Dwarf sections with a label at the start of each one. void emitSectionLabels(); @@ -528,10 +555,16 @@ private: void emitAccelTypes(); /// \brief Emit visible names into a debug pubnames section. - void emitDebugPubnames(); + /// \param GnuStyle determines whether or not we want to emit + /// additional information into the table ala newer gcc for gdb + /// index. + void emitDebugPubNames(bool GnuStyle = false); /// \brief Emit visible types into a debug pubtypes section. - void emitDebugPubTypes(); + /// \param GnuStyle determines whether or not we want to emit + /// additional information into the table ala newer gcc for gdb + /// index. + void emitDebugPubTypes(bool GnuStyle = false); /// \brief Emit visible names into a debug str section. void emitDebugStr(); @@ -555,7 +588,7 @@ private: /// \brief Construct the split debug info compile unit for the debug info /// section. - CompileUnit *constructSkeletonCU(const MDNode *); + CompileUnit *constructSkeletonCU(const CompileUnit *CU); /// \brief Emit the local split abbreviations. void emitSkeletonAbbrevs(const MCSection *); @@ -571,7 +604,7 @@ private: /// \brief Create new CompileUnit for the given metadata node with tag /// DW_TAG_compile_unit. - CompileUnit *constructCompileUnit(const MDNode *N); + CompileUnit *constructCompileUnit(DICompileUnit DIUnit); /// \brief Construct subprogram DIE. void constructSubprogramDIE(CompileUnit *TheCU, const MDNode *N); @@ -633,7 +666,13 @@ public: // Main entry points. // DwarfDebug(AsmPrinter *A, Module *M); - ~DwarfDebug(); + + void insertDIE(const MDNode *TypeMD, DIE *Die) { + MDTypeNodeToDieMap.insert(std::make_pair(TypeMD, Die)); + } + DIE *getDIE(const MDNode *TypeMD) { + return MDTypeNodeToDieMap.lookup(TypeMD); + } /// \brief Emit all Dwarf sections that should come prior to the /// content. @@ -658,6 +697,13 @@ public: /// type units. void addTypeUnitType(DIE *Die) { TypeUnits.push_back(Die); } + /// \brief Add a label so that arange data can be generated for it. + void addArangeLabel(SymbolCU SCU) { ArangeLabels.push_back(SCU); } + + /// \brief For symbols that have a size designated (e.g. common symbols), + /// this tracks that size. + void setSymbolSize(const MCSymbol *Sym, uint64_t Size) { SymSize[Sym] = Size;} + /// \brief Look up the source id with the given directory and source file /// names. If none currently exists, create a new id and insert it in the /// SourceIds map. @@ -665,11 +711,7 @@ public: unsigned CUID); /// \brief Recursively Emits a debug information entry. - void emitDIE(DIE *Die, std::vector<DIEAbbrev *> *Abbrevs); - - /// \brief Returns whether or not to limit some of our debug - /// output to the limitations of darwin gdb. - bool useDarwinGDBCompat() { return IsDarwinGDBCompat; } + void emitDIE(DIE *Die, ArrayRef<DIEAbbrev *> Abbrevs); // Experimental DWARF5 features. @@ -683,6 +725,16 @@ public: /// Returns the Dwarf Version. unsigned getDwarfVersion() const { return DwarfVersion; } + + /// Find the MDNode for the given reference. + template <typename T> T resolve(DIRef<T> Ref) const { + return Ref.resolve(TypeIdentifierMap); + } + + /// isSubprogramContext - Return true if Context is either a subprogram + /// or another context nested inside a subprogram. + bool isSubprogramContext(const MDNode *Context); + }; } // End of namespace llvm diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h index 49a85d8..1575161 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/lib/CodeGen/AsmPrinter/DwarfException.h @@ -29,6 +29,7 @@ class MCAsmInfo; class MCExpr; class MCSymbol; class Function; +class ARMTargetStreamer; class AsmPrinter; //===----------------------------------------------------------------------===// @@ -177,6 +178,8 @@ public: class ARMException : public DwarfException { void EmitTypeInfos(unsigned TTypeEncoding); + ARMTargetStreamer &getTargetStreamer(); + public: //===--------------------------------------------------------------------===// // Main entry points. diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp index b48b817..24aa1ab 100644 --- a/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -83,6 +83,8 @@ public: virtual unsigned getJumpBufAlignment() const; virtual unsigned getJumpBufSize() const; virtual bool shouldBuildLookupTables() const; + virtual bool haveFastSqrt(Type *Ty) const; + virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const; /// @} @@ -111,6 +113,7 @@ public: ArrayRef<Type*> Tys) const; virtual unsigned getNumberOfParts(Type *Tp) const; virtual unsigned getAddressComputationCost(Type *Ty, bool IsComplex) const; + virtual unsigned getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise) const; /// @} }; @@ -182,6 +185,14 @@ bool BasicTTI::shouldBuildLookupTables() const { TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other)); } +bool BasicTTI::haveFastSqrt(Type *Ty) const { + const TargetLoweringBase *TLI = getTLI(); + EVT VT = TLI->getValueType(Ty); + return TLI->isTypeLegal(VT) && TLI->isOperationLegalOrCustom(ISD::FSQRT, VT); +} + +void BasicTTI::getUnrollingPreferences(Loop *, UnrollingPreferences &) const { } + //===----------------------------------------------------------------------===// // // Calls used by the vectorizers. @@ -443,12 +454,14 @@ unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, case Intrinsic::log10: ISD = ISD::FLOG10; break; case Intrinsic::log2: ISD = ISD::FLOG2; break; case Intrinsic::fabs: ISD = ISD::FABS; break; + case Intrinsic::copysign: ISD = ISD::FCOPYSIGN; break; case Intrinsic::floor: ISD = ISD::FFLOOR; break; case Intrinsic::ceil: ISD = ISD::FCEIL; break; case Intrinsic::trunc: ISD = ISD::FTRUNC; break; case Intrinsic::nearbyint: ISD = ISD::FNEARBYINT; break; case Intrinsic::rint: ISD = ISD::FRINT; break; + case Intrinsic::round: ISD = ISD::FROUND; break; case Intrinsic::pow: ISD = ISD::FPOW; break; case Intrinsic::fma: ISD = ISD::FMA; break; case Intrinsic::fmuladd: ISD = ISD::FMA; break; // FIXME: mul + add? @@ -498,3 +511,17 @@ unsigned BasicTTI::getNumberOfParts(Type *Tp) const { unsigned BasicTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const { return 0; } + +unsigned BasicTTI::getReductionCost(unsigned Opcode, Type *Ty, + bool IsPairwise) const { + assert(Ty->isVectorTy() && "Expect a vector type"); + unsigned NumVecElts = Ty->getVectorNumElements(); + unsigned NumReduxLevels = Log2_32(NumVecElts); + unsigned ArithCost = NumReduxLevels * + TopTTI->getArithmeticInstrCost(Opcode, Ty); + // Assume the pairwise shuffles add a cost. + unsigned ShuffleCost = + NumReduxLevels * (IsPairwise + 1) * + TopTTI->getShuffleCost(SK_ExtractSubvector, Ty, NumVecElts / 2, Ty); + return ShuffleCost + ArithCost + getScalarizationOverhead(Ty, false, true); +} diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h index 26bdca9..0d15ed7 100644 --- a/lib/CodeGen/BranchFolding.h +++ b/lib/CodeGen/BranchFolding.h @@ -1,4 +1,4 @@ -//===-- BranchFolding.h - Fold machine code branch instructions --*- C++ -*===// +//===-- BranchFolding.h - Fold machine code branch instructions -*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 56aa330..10cc9ff 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -35,6 +35,7 @@ add_llvm_library(LLVMCodeGen LiveRangeCalc.cpp LiveRangeEdit.cpp LiveRegMatrix.cpp + LiveRegUnits.cpp LiveStackAnalysis.cpp LiveVariables.cpp LocalStackSlotAllocation.cpp @@ -88,7 +89,6 @@ add_llvm_library(LLVMCodeGen ScheduleDAGPrinter.cpp ScoreboardHazardRecognizer.cpp ShadowStackGC.cpp - ShrinkWrapping.cpp SjLjEHPrepare.cpp SlotIndexes.cpp SpillPlacement.cpp @@ -97,7 +97,7 @@ add_llvm_library(LLVMCodeGen StackColoring.cpp StackProtector.cpp StackSlotColoring.cpp - StrongPHIElimination.cpp + StackMaps.cpp TailDuplication.cpp TargetFrameLoweringImpl.cpp TargetInstrInfo.cpp diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp index b03c325..4925c4d 100644 --- a/lib/CodeGen/CalcSpillWeights.cpp +++ b/lib/CodeGen/CalcSpillWeights.cpp @@ -9,14 +9,12 @@ #define DEBUG_TYPE "calcspillweights" -#include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/SlotIndexes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" @@ -24,38 +22,22 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; -char CalculateSpillWeights::ID = 0; -INITIALIZE_PASS_BEGIN(CalculateSpillWeights, "calcspillweights", - "Calculate spill weights", false, false) -INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_END(CalculateSpillWeights, "calcspillweights", - "Calculate spill weights", false, false) - -void CalculateSpillWeights::getAnalysisUsage(AnalysisUsage &au) const { - au.addRequired<LiveIntervals>(); - au.addRequired<MachineBlockFrequencyInfo>(); - au.addRequired<MachineLoopInfo>(); - au.setPreservesAll(); - MachineFunctionPass::getAnalysisUsage(au); -} - -bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &MF) { - +void llvm::calculateSpillWeightsAndHints(LiveIntervals &LIS, + MachineFunction &MF, + const MachineLoopInfo &MLI, + const MachineBlockFrequencyInfo &MBFI, + VirtRegAuxInfo::NormalizingFn norm) { DEBUG(dbgs() << "********** Compute Spill Weights **********\n" << "********** Function: " << MF.getName() << '\n'); - LiveIntervals &LIS = getAnalysis<LiveIntervals>(); MachineRegisterInfo &MRI = MF.getRegInfo(); - VirtRegAuxInfo VRAI(MF, LIS, getAnalysis<MachineLoopInfo>(), - getAnalysis<MachineBlockFrequencyInfo>()); + VirtRegAuxInfo VRAI(MF, LIS, MLI, MBFI, norm); for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); if (MRI.reg_nodbg_empty(Reg)) continue; - VRAI.CalculateWeightAndHint(LIS.getInterval(Reg)); + VRAI.calculateSpillWeightAndHint(LIS.getInterval(Reg)); } - return false; } // Return the preferred allocation register for reg, given a COPY instruction. @@ -111,7 +93,7 @@ static bool isRematerializable(const LiveInterval &LI, } void -VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { +VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) { MachineRegisterInfo &mri = MF.getRegInfo(); const TargetRegisterInfo &tri = *MF.getTarget().getRegisterInfo(); MachineBasicBlock *mbb = 0; @@ -201,5 +183,5 @@ VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { if (isRematerializable(li, LIS, *MF.getTarget().getInstrInfo())) totalWeight *= 0.5F; - li.weight = normalizeSpillWeight(totalWeight, li.getSize()); + li.weight = normalize(totalWeight, li.getSize()); } diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index c641991..7430c53 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -22,7 +22,6 @@ using namespace llvm; void llvm::initializeCodeGen(PassRegistry &Registry) { initializeBasicTTIPass(Registry); initializeBranchFolderPassPass(Registry); - initializeCalculateSpillWeightsPass(Registry); initializeDeadMachineInstructionElimPass(Registry); initializeEarlyIfConverterPass(Registry); initializeExpandPostRAPass(Registry); @@ -60,7 +59,6 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeStackProtectorPass(Registry); initializeStackColoringPass(Registry); initializeStackSlotColoringPass(Registry); - initializeStrongPHIEliminationPass(Registry); initializeTailDuplicatePassPass(Registry); initializeTargetPassConfigPass(Registry); initializeTwoAddressInstructionPassPass(Registry); diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp index 840a101..6619bcf 100644 --- a/lib/CodeGen/DFAPacketizer.cpp +++ b/lib/CodeGen/DFAPacketizer.cpp @@ -160,7 +160,8 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, MachineBasicBlock::iterator EndItr) { assert(VLIWScheduler && "VLIW Scheduler is not initialized!"); VLIWScheduler->startBlock(MBB); - VLIWScheduler->enterRegion(MBB, BeginItr, EndItr, MBB->size()); + VLIWScheduler->enterRegion(MBB, BeginItr, EndItr, + std::distance(BeginItr, EndItr)); VLIWScheduler->schedule(); // Generate MI -> SU map. diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp index e277f5c..031f19c 100644 --- a/lib/CodeGen/ExecutionDepsFix.cpp +++ b/lib/CodeGen/ExecutionDepsFix.cpp @@ -23,6 +23,7 @@ #define DEBUG_TYPE "execution-fix" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Allocator.h" @@ -136,6 +137,12 @@ class ExeDepsFix : public MachineFunctionPass { typedef DenseMap<MachineBasicBlock*, LiveReg*> LiveOutMap; LiveOutMap LiveOuts; + /// List of undefined register reads in this block in forward order. + std::vector<std::pair<MachineInstr*, unsigned> > UndefReads; + + /// Storage for register unit liveness. + LiveRegUnits LiveUnits; + /// Current instruction number. /// The first instruction in each basic block is 0. int CurInstr; @@ -185,6 +192,8 @@ private: void processDefs(MachineInstr*, bool Kill); void visitSoftInstr(MachineInstr*, unsigned mask); void visitHardInstr(MachineInstr*, unsigned domain); + bool shouldBreakDependence(MachineInstr*, unsigned OpIdx, unsigned Pref); + void processUndefReads(MachineBasicBlock*); }; } @@ -341,6 +350,10 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { // Reset instruction counter in each basic block. CurInstr = 0; + // Set up UndefReads to track undefined register reads. + UndefReads.clear(); + LiveUnits.clear(); + // Set up LiveRegs to represent registers entering MBB. if (!LiveRegs) LiveRegs = new LiveReg[NumRegs]; @@ -448,10 +461,46 @@ void ExeDepsFix::visitInstr(MachineInstr *MI) { processDefs(MI, !DomP.first); } +/// \brief Return true to if it makes sense to break dependence on a partial def +/// or undef use. +bool ExeDepsFix::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx, + unsigned Pref) { + int rx = regIndex(MI->getOperand(OpIdx).getReg()); + if (rx < 0) + return false; + + unsigned Clearance = CurInstr - LiveRegs[rx].Def; + DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref); + + if (Pref > Clearance) { + DEBUG(dbgs() << ": Break dependency.\n"); + return true; + } + // The current clearance seems OK, but we may be ignoring a def from a + // back-edge. + if (!SeenUnknownBackEdge || Pref <= unsigned(CurInstr)) { + DEBUG(dbgs() << ": OK .\n"); + return false; + } + // A def from an unprocessed back-edge may make us break this dependency. + DEBUG(dbgs() << ": Wait for back-edge to resolve.\n"); + return false; +} + // Update def-ages for registers defined by MI. // If Kill is set, also kill off DomainValues clobbered by the defs. +// +// Also break dependencies on partial defs and undef uses. void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) { assert(!MI->isDebugValue() && "Won't process debug values"); + + // Break dependence on undef uses. Do this before updating LiveRegs below. + unsigned OpNum; + unsigned Pref = TII->getUndefRegClearance(MI, OpNum, TRI); + if (Pref) { + if (shouldBreakDependence(MI, OpNum, Pref)) + UndefReads.push_back(std::make_pair(MI, OpNum)); + } const MCInstrDesc &MCID = MI->getDesc(); for (unsigned i = 0, e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs(); @@ -471,37 +520,58 @@ void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) { DEBUG(dbgs() << TRI->getName(RC->getRegister(rx)) << ":\t" << CurInstr << '\t' << *MI); + // Check clearance before partial register updates. + // Call breakDependence before setting LiveRegs[rx].Def. + unsigned Pref = TII->getPartialRegUpdateClearance(MI, i, TRI); + if (Pref && shouldBreakDependence(MI, i, Pref)) + TII->breakPartialRegDependency(MI, i, TRI); + // How many instructions since rx was last written? - unsigned Clearance = CurInstr - LiveRegs[rx].Def; LiveRegs[rx].Def = CurInstr; // Kill off domains redefined by generic instructions. if (Kill) kill(rx); + } + ++CurInstr; +} - // Verify clearance before partial register updates. - unsigned Pref = TII->getPartialRegUpdateClearance(MI, i, TRI); - if (!Pref) - continue; - DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref); - if (Pref > Clearance) { - DEBUG(dbgs() << ": Break dependency.\n"); - TII->breakPartialRegDependency(MI, i, TRI); - continue; - } - - // The current clearance seems OK, but we may be ignoring a def from a - // back-edge. - if (!SeenUnknownBackEdge || Pref <= unsigned(CurInstr)) { - DEBUG(dbgs() << ": OK.\n"); - continue; - } +/// \break Break false dependencies on undefined register reads. +/// +/// Walk the block backward computing precise liveness. This is expensive, so we +/// only do it on demand. Note that the occurrence of undefined register reads +/// that should be broken is very rare, but when they occur we may have many in +/// a single block. +void ExeDepsFix::processUndefReads(MachineBasicBlock *MBB) { + if (UndefReads.empty()) + return; - // A def from an unprocessed back-edge may make us break this dependency. - DEBUG(dbgs() << ": Wait for back-edge to resolve.\n"); + // Collect this block's live out register units. + LiveUnits.init(TRI); + for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) { + LiveUnits.addLiveIns(*SI, *TRI); } + MachineInstr *UndefMI = UndefReads.back().first; + unsigned OpIdx = UndefReads.back().second; - ++CurInstr; + for (MachineBasicBlock::reverse_iterator I = MBB->rbegin(), E = MBB->rend(); + I != E; ++I) { + // Update liveness, including the current instrucion's defs. + LiveUnits.stepBackward(*I, *TRI); + + if (UndefMI == &*I) { + if (!LiveUnits.contains(UndefMI->getOperand(OpIdx).getReg(), *TRI)) + TII->breakPartialRegDependency(UndefMI, OpIdx, TRI); + + UndefReads.pop_back(); + if (UndefReads.empty()) + return; + + UndefMI = UndefReads.back().first; + OpIdx = UndefReads.back().second; + } + } } // A hard instruction only works in one domain. All input registers will be @@ -549,7 +619,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { // Is it possible to use this collapsed register for free? if (dv->isCollapsed()) { // Restrict available domains to the ones in common with the operand. - // If there are no common domains, we must pay the cross-domain + // If there are no common domains, we must pay the cross-domain // penalty for this operand. if (common) available = common; } else if (common) @@ -686,6 +756,7 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) visitInstr(I); + processUndefReads(MBB); leaveBasicBlock(MBB); } @@ -698,6 +769,7 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { ++I) if (!I->isDebugValue()) processDefs(I, false); + processUndefReads(MBB); leaveBasicBlock(MBB); } @@ -713,6 +785,7 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { delete[] FI->second; } LiveOuts.clear(); + UndefReads.clear(); Avail.clear(); Allocator.DestroyAll(); diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp index 1611db8..6c73fff 100644 --- a/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -104,7 +104,7 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) { } if (DstSubReg == InsReg) { - // No need to insert an identify copy instruction. + // No need to insert an identity copy instruction. // Watch out for case like this: // %RAX<def> = SUBREG_TO_REG 0, %EAX<kill>, 3 // We must leave %RAX live. diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index 1ae7e3b..e2d0eb4 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -22,6 +22,8 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetSchedule.h" +#include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -31,6 +33,8 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + using namespace llvm; // Hidden options for help debugging. @@ -150,14 +154,17 @@ namespace { /// BBAnalysis - Results of if-conversion feasibility analysis indexed by /// basic block number. std::vector<BBInfo> BBAnalysis; + TargetSchedModel SchedModel; const TargetLoweringBase *TLI; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; - const InstrItineraryData *InstrItins; const MachineBranchProbabilityInfo *MBPI; MachineRegisterInfo *MRI; + LiveRegUnits Redefs; + LiveRegUnits DontKill; + bool PreRegAlloc; bool MadeChange; int FnNum; @@ -198,11 +205,9 @@ namespace { void PredicateBlock(BBInfo &BBI, MachineBasicBlock::iterator E, SmallVectorImpl<MachineOperand> &Cond, - SmallSet<unsigned, 4> &Redefs, SmallSet<unsigned, 4> *LaterRedefs = 0); void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, SmallVectorImpl<MachineOperand> &Cond, - SmallSet<unsigned, 4> &Redefs, bool IgnoreBr = false); void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges = true); @@ -267,7 +272,11 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { TRI = MF.getTarget().getRegisterInfo(); MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); MRI = &MF.getRegInfo(); - InstrItins = MF.getTarget().getInstrItineraryData(); + + const TargetSubtargetInfo &ST = + MF.getTarget().getSubtarget<TargetSubtargetInfo>(); + SchedModel.init(*ST.getSchedModel(), &ST, TII); + if (!TII) return false; PreRegAlloc = MRI->isSSA(); @@ -666,32 +675,28 @@ void IfConverter::ScanInstructions(BBInfo &BBI) { bool isPredicated = TII->isPredicated(I); bool isCondBr = BBI.IsBrAnalyzable && I->isConditionalBranch(); - if (!isCondBr) { - if (!isPredicated) { - BBI.NonPredSize++; - unsigned ExtraPredCost = 0; - unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I, - &ExtraPredCost); - if (NumCycles > 1) - BBI.ExtraCost += NumCycles-1; - BBI.ExtraCost2 += ExtraPredCost; - } else if (!AlreadyPredicated) { - // FIXME: This instruction is already predicated before the - // if-conversion pass. It's probably something like a conditional move. - // Mark this block unpredicable for now. - BBI.IsUnpredicable = true; - return; - } + // A conditional branch is not predicable, but it may be eliminated. + if (isCondBr) + continue; + + if (!isPredicated) { + BBI.NonPredSize++; + unsigned ExtraPredCost = TII->getPredicationCost(&*I); + unsigned NumCycles = SchedModel.computeInstrLatency(&*I, false); + if (NumCycles > 1) + BBI.ExtraCost += NumCycles-1; + BBI.ExtraCost2 += ExtraPredCost; + } else if (!AlreadyPredicated) { + // FIXME: This instruction is already predicated before the + // if-conversion pass. It's probably something like a conditional move. + // Mark this block unpredicable for now. + BBI.IsUnpredicable = true; + return; } if (BBI.ClobbersPred && !isPredicated) { // Predicate modification instruction should end the block (except for // already predicated instructions and end of block branches). - if (isCondBr) { - // A conditional branch is not predicable, but it may be eliminated. - continue; - } - // Predicate may have been modified, the subsequent (currently) // unpredicated instructions cannot be correctly predicated. BBI.IsUnpredicable = true; @@ -961,64 +966,58 @@ void IfConverter::RemoveExtraEdges(BBInfo &BBI) { BBI.BB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); } -/// InitPredRedefs / UpdatePredRedefs - Defs by predicated instructions are -/// modeled as read + write (sort like two-address instructions). These -/// routines track register liveness and add implicit uses to if-converted -/// instructions to conform to the model. -static void InitPredRedefs(MachineBasicBlock *BB, SmallSet<unsigned,4> &Redefs, - const TargetRegisterInfo *TRI) { - for (MachineBasicBlock::livein_iterator I = BB->livein_begin(), - E = BB->livein_end(); I != E; ++I) { - unsigned Reg = *I; - for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); - SubRegs.isValid(); ++SubRegs) - Redefs.insert(*SubRegs); - } -} - -static void UpdatePredRedefs(MachineInstr *MI, SmallSet<unsigned,4> &Redefs, - const TargetRegisterInfo *TRI, - bool AddImpUse = false) { - SmallVector<unsigned, 4> Defs; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) +/// Behaves like LiveRegUnits::StepForward() but also adds implicit uses to all +/// values defined in MI which are not live/used by MI. +static void UpdatePredRedefs(MachineInstr *MI, LiveRegUnits &Redefs, + const TargetRegisterInfo *TRI) { + for (ConstMIBundleOperands Ops(MI); Ops.isValid(); ++Ops) { + if (!Ops->isReg() || !Ops->isKill()) continue; - unsigned Reg = MO.getReg(); - if (!Reg) + unsigned Reg = Ops->getReg(); + if (Reg == 0) continue; - if (MO.isDef()) - Defs.push_back(Reg); - else if (MO.isKill()) { - for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); - SubRegs.isValid(); ++SubRegs) - Redefs.erase(*SubRegs); - } + Redefs.removeReg(Reg, *TRI); } - MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); - for (unsigned i = 0, e = Defs.size(); i != e; ++i) { - unsigned Reg = Defs[i]; - if (!Redefs.insert(Reg)) { - if (AddImpUse) - // Treat predicated update as read + write. - MIB.addReg(Reg, RegState::Implicit | RegState::Undef); - } else { - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) - Redefs.insert(*SubRegs); - } + for (MIBundleOperands Ops(MI); Ops.isValid(); ++Ops) { + if (!Ops->isReg() || !Ops->isDef()) + continue; + unsigned Reg = Ops->getReg(); + if (Reg == 0 || Redefs.contains(Reg, *TRI)) + continue; + Redefs.addReg(Reg, *TRI); + + MachineOperand &Op = *Ops; + MachineInstr *MI = Op.getParent(); + MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); + MIB.addReg(Reg, RegState::Implicit | RegState::Undef); } } -static void UpdatePredRedefs(MachineBasicBlock::iterator I, - MachineBasicBlock::iterator E, - SmallSet<unsigned,4> &Redefs, - const TargetRegisterInfo *TRI) { - while (I != E) { - UpdatePredRedefs(I, Redefs, TRI); - ++I; +/** + * Remove kill flags from operands with a registers in the @p DontKill set. + */ +static void RemoveKills(MachineInstr &MI, const LiveRegUnits &DontKill, + const MCRegisterInfo &MCRI) { + for (MIBundleOperands O(&MI); O.isValid(); ++O) { + if (!O->isReg() || !O->isKill()) + continue; + if (DontKill.contains(O->getReg(), MCRI)) + O->setIsKill(false); } } +/** + * Walks a range of machine instructions and removes kill flags for registers + * in the @p DontKill set. + */ +static void RemoveKills(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator E, + const LiveRegUnits &DontKill, + const MCRegisterInfo &MCRI) { + for ( ; I != E; ++I) + RemoveKills(*I, DontKill, MCRI); +} + /// IfConvertSimple - If convert a simple (split, no rejoin) sub-CFG. /// bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { @@ -1049,21 +1048,27 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { // Initialize liveins to the first BB. These are potentiall redefined by // predicated instructions. - SmallSet<unsigned, 4> Redefs; - InitPredRedefs(CvtBBI->BB, Redefs, TRI); - InitPredRedefs(NextBBI->BB, Redefs, TRI); + Redefs.init(TRI); + Redefs.addLiveIns(CvtBBI->BB, *TRI); + Redefs.addLiveIns(NextBBI->BB, *TRI); + + // Compute a set of registers which must not be killed by instructions in + // BB1: This is everything live-in to BB2. + DontKill.init(TRI); + DontKill.addLiveIns(NextBBI->BB, *TRI); if (CvtBBI->BB->pred_size() > 1) { BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); // Copy instructions in the true block, predicate them, and add them to // the entry block. - CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs); + CopyAndPredicateBlock(BBI, *CvtBBI, Cond); // RemoveExtraEdges won't work if the block has an unanalyzable branch, so // explicitly remove CvtBBI as a successor. BBI.BB->removeSuccessor(CvtBBI->BB); } else { - PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond, Redefs); + RemoveKills(CvtBBI->BB->begin(), CvtBBI->BB->end(), DontKill, *TRI); + PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond); // Merge converted block into entry block. BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); @@ -1148,16 +1153,18 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { // Initialize liveins to the first BB. These are potentially redefined by // predicated instructions. - SmallSet<unsigned, 4> Redefs; - InitPredRedefs(CvtBBI->BB, Redefs, TRI); - InitPredRedefs(NextBBI->BB, Redefs, TRI); + Redefs.init(TRI); + Redefs.addLiveIns(CvtBBI->BB, *TRI); + Redefs.addLiveIns(NextBBI->BB, *TRI); + + DontKill.clear(); bool HasEarlyExit = CvtBBI->FalseBB != NULL; if (CvtBBI->BB->pred_size() > 1) { BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); // Copy instructions in the true block, predicate them, and add them to // the entry block. - CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs, true); + CopyAndPredicateBlock(BBI, *CvtBBI, Cond, true); // RemoveExtraEdges won't work if the block has an unanalyzable branch, so // explicitly remove CvtBBI as a successor. @@ -1165,7 +1172,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { } else { // Predicate the 'true' block after removing its branch. CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB); - PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond, Redefs); + PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond); // Now merge the entry of the triangle with the true block. BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); @@ -1276,8 +1283,8 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, // Initialize liveins to the first BB. These are potentially redefined by // predicated instructions. - SmallSet<unsigned, 4> Redefs; - InitPredRedefs(BBI1->BB, Redefs, TRI); + Redefs.init(TRI); + Redefs.addLiveIns(BBI1->BB, *TRI); // Remove the duplicated instructions at the beginnings of both paths. MachineBasicBlock::iterator DI1 = BBI1->BB->begin(); @@ -1304,7 +1311,19 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, --NumDups1; } - UpdatePredRedefs(BBI1->BB->begin(), DI1, Redefs, TRI); + // Compute a set of registers which must not be killed by instructions in BB1: + // This is everything used+live in BB2 after the duplicated instructions. We + // can compute this set by simulating liveness backwards from the end of BB2. + DontKill.init(TRI); + for (MachineBasicBlock::reverse_iterator I = BBI2->BB->rbegin(), + E = MachineBasicBlock::reverse_iterator(DI2); I != E; ++I) { + DontKill.stepBackward(*I, *TRI); + } + + for (MachineBasicBlock::const_iterator I = BBI1->BB->begin(), E = DI1; I != E; + ++I) { + Redefs.stepForward(*I, *TRI); + } BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1); BBI2->BB->erase(BBI2->BB->begin(), DI2); @@ -1322,6 +1341,10 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, } BBI1->BB->erase(DI1, BBI1->BB->end()); + // Kill flags in the true block for registers living into the false block + // must be removed. + RemoveKills(BBI1->BB->begin(), BBI1->BB->end(), DontKill, *TRI); + // Remove 'false' block branch and find the last instruction to predicate. BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB); DI2 = BBI2->BB->end(); @@ -1380,10 +1403,10 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, } // Predicate the 'true' block. - PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, Redefs, &RedefsByFalse); + PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, &RedefsByFalse); // Predicate the 'false' block. - PredicateBlock(*BBI2, DI2, *Cond2, Redefs); + PredicateBlock(*BBI2, DI2, *Cond2); // Merge the true block into the entry of the diamond. MergeBlocks(BBI, *BBI1, TailBB == 0); @@ -1458,7 +1481,6 @@ static bool MaySpeculate(const MachineInstr *MI, void IfConverter::PredicateBlock(BBInfo &BBI, MachineBasicBlock::iterator E, SmallVectorImpl<MachineOperand> &Cond, - SmallSet<unsigned, 4> &Redefs, SmallSet<unsigned, 4> *LaterRedefs) { bool AnyUnpred = false; bool MaySpec = LaterRedefs != 0; @@ -1484,7 +1506,7 @@ void IfConverter::PredicateBlock(BBInfo &BBI, // If the predicated instruction now redefines a register as the result of // if-conversion, add an implicit kill. - UpdatePredRedefs(I, Redefs, TRI, true); + UpdatePredRedefs(I, Redefs, TRI); } std::copy(Cond.begin(), Cond.end(), std::back_inserter(BBI.Predicate)); @@ -1501,7 +1523,6 @@ void IfConverter::PredicateBlock(BBInfo &BBI, /// the destination block. Skip end of block branches if IgnoreBr is true. void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, SmallVectorImpl<MachineOperand> &Cond, - SmallSet<unsigned, 4> &Redefs, bool IgnoreBr) { MachineFunction &MF = *ToBBI.BB->getParent(); @@ -1514,8 +1535,8 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, MachineInstr *MI = MF.CloneMachineInstr(I); ToBBI.BB->insert(ToBBI.BB->end(), MI); ToBBI.NonPredSize++; - unsigned ExtraPredCost = 0; - unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I, &ExtraPredCost); + unsigned ExtraPredCost = TII->getPredicationCost(&*I); + unsigned NumCycles = SchedModel.computeInstrLatency(&*I, false); if (NumCycles > 1) ToBBI.ExtraCost += NumCycles-1; ToBBI.ExtraCost2 += ExtraPredCost; @@ -1531,7 +1552,11 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, // If the predicated instruction now redefines a register as the result of // if-conversion, add an implicit kill. - UpdatePredRedefs(MI, Redefs, TRI, true); + UpdatePredRedefs(MI, Redefs, TRI); + + // Some kill flags may not be correct anymore. + if (!DontKill.empty()) + RemoveKills(*MI, DontKill, *TRI); } if (!IgnoreBr) { diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index 8910652..bb0e642 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -179,10 +179,8 @@ private: bool coalesceStackAccess(MachineInstr *MI, unsigned Reg); bool foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> >, MachineInstr *LoadMI = 0); - void insertReload(LiveInterval &NewLI, SlotIndex, - MachineBasicBlock::iterator MI); - void insertSpill(LiveInterval &NewLI, const LiveInterval &OldLI, - SlotIndex, MachineBasicBlock::iterator MI); + void insertReload(unsigned VReg, SlotIndex, MachineBasicBlock::iterator MI); + void insertSpill(unsigned VReg, bool isKill, MachineBasicBlock::iterator MI); void spillAroundUses(unsigned Reg); void spillAll(); @@ -580,7 +578,7 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, if (unsigned SrcReg = isFullCopyOf(MI, Reg)) { if (isSibling(SrcReg)) { LiveInterval &SrcLI = LIS.getInterval(SrcReg); - LiveRangeQuery SrcQ(SrcLI, VNI->def); + LiveQueryResult SrcQ = SrcLI.Query(VNI->def); assert(SrcQ.valueIn() && "Copy from non-existing value"); // Check if this COPY kills its source. SVI->second.KillsSource = SrcQ.isKill(); @@ -885,12 +883,12 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, } // Alocate a new register for the remat. - LiveInterval &NewLI = Edit->createFrom(Original); - NewLI.markNotSpillable(); + unsigned NewVReg = Edit->createFrom(Original); // Finally we can rematerialize OrigMI before MI. - SlotIndex DefIdx = Edit->rematerializeAt(*MI->getParent(), MI, NewLI.reg, RM, + SlotIndex DefIdx = Edit->rematerializeAt(*MI->getParent(), MI, NewVReg, RM, TRI); + (void)DefIdx; DEBUG(dbgs() << "\tremat: " << DefIdx << '\t' << *LIS.getInstructionFromIndex(DefIdx)); @@ -898,15 +896,12 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, for (unsigned i = 0, e = Ops.size(); i != e; ++i) { MachineOperand &MO = MI->getOperand(Ops[i].second); if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) { - MO.setReg(NewLI.reg); + MO.setReg(NewVReg); MO.setIsKill(); } } - DEBUG(dbgs() << "\t " << UseIdx << '\t' << *MI); + DEBUG(dbgs() << "\t " << UseIdx << '\t' << *MI << '\n'); - VNInfo *DefVNI = NewLI.getNextValue(DefIdx, LIS.getVNInfoAllocator()); - NewLI.addRange(LiveRange(DefIdx, UseIdx.getRegSlot(), DefVNI)); - DEBUG(dbgs() << "\tinterval: " << NewLI << '\n'); ++NumRemats; return true; } @@ -1009,6 +1004,40 @@ bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, unsigned Reg) { return true; } +#if !defined(NDEBUG) +// Dump the range of instructions from B to E with their slot indexes. +static void dumpMachineInstrRangeWithSlotIndex(MachineBasicBlock::iterator B, + MachineBasicBlock::iterator E, + LiveIntervals const &LIS, + const char *const header, + unsigned VReg =0) { + char NextLine = '\n'; + char SlotIndent = '\t'; + + if (llvm::next(B) == E) { + NextLine = ' '; + SlotIndent = ' '; + } + + dbgs() << '\t' << header << ": " << NextLine; + + for (MachineBasicBlock::iterator I = B; I != E; ++I) { + SlotIndex Idx = LIS.getInstructionIndex(I).getRegSlot(); + + // If a register was passed in and this instruction has it as a + // destination that is marked as an early clobber, print the + // early-clobber slot index. + if (VReg) { + MachineOperand *MO = I->findRegisterDefOperand(VReg); + if (MO && MO->isEarlyClobber()) + Idx = Idx.getRegSlot(true); + } + + dbgs() << SlotIndent << Idx << '\t' << *I; + } +} +#endif + /// foldMemoryOperand - Try folding stack slot references in Ops into their /// instructions. /// @@ -1028,6 +1057,9 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops, bool WasCopy = MI->isCopy(); unsigned ImpReg = 0; + bool SpillSubRegs = (MI->getOpcode() == TargetOpcode::PATCHPOINT || + MI->getOpcode() == TargetOpcode::STACKMAP); + // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied // operands. SmallVector<unsigned, 8> FoldOps; @@ -1039,7 +1071,7 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops, continue; } // FIXME: Teach targets to deal with subregs. - if (MO.getSubReg()) + if (!SpillSubRegs && MO.getSubReg()) return false; // We cannot fold a load instruction into a def. if (LoadMI && MO.isDef()) @@ -1049,6 +1081,8 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops, FoldOps.push_back(Idx); } + MachineInstrSpan MIS(MI); + MachineInstr *FoldMI = LoadMI ? TII.foldMemoryOperand(MI, FoldOps, LoadMI) : TII.foldMemoryOperand(MI, FoldOps, StackSlot); @@ -1075,16 +1109,24 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops, // FoldMI does not define this physreg. Remove the LI segment. assert(MO->isDead() && "Cannot fold physreg def"); for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) { - if (LiveInterval *LI = LIS.getCachedRegUnit(*Units)) { + if (LiveRange *LR = LIS.getCachedRegUnit(*Units)) { SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot(); - if (VNInfo *VNI = LI->getVNInfoAt(Idx)) - LI->removeValNo(VNI); + if (VNInfo *VNI = LR->getVNInfoAt(Idx)) + LR->removeValNo(VNI); } } } + LIS.ReplaceMachineInstrInMaps(MI, FoldMI); MI->eraseFromParent(); + // Insert any new instructions other than FoldMI into the LIS maps. + assert(!MIS.empty() && "Unexpected empty span of instructions!"); + for (MachineBasicBlock::iterator MII = MIS.begin(), End = MIS.end(); + MII != End; ++MII) + if (&*MII != FoldMI) + LIS.InsertMachineInstrInMaps(&*MII); + // TII.foldMemoryOperand may have left some implicit operands on the // instruction. Strip them. if (ImpReg) @@ -1096,8 +1138,9 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops, FoldMI->RemoveOperand(i - 1); } - DEBUG(dbgs() << "\tfolded: " << LIS.getInstructionIndex(FoldMI) << '\t' - << *FoldMI); + DEBUG(dumpMachineInstrRangeWithSlotIndex(MIS.begin(), MIS.end(), LIS, + "folded")); + if (!WasCopy) ++NumFolded; else if (Ops.front().second == 0) @@ -1107,36 +1150,35 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops, return true; } -/// insertReload - Insert a reload of NewLI.reg before MI. -void InlineSpiller::insertReload(LiveInterval &NewLI, +void InlineSpiller::insertReload(unsigned NewVReg, SlotIndex Idx, MachineBasicBlock::iterator MI) { MachineBasicBlock &MBB = *MI->getParent(); - TII.loadRegFromStackSlot(MBB, MI, NewLI.reg, StackSlot, - MRI.getRegClass(NewLI.reg), &TRI); - --MI; // Point to load instruction. - SlotIndex LoadIdx = LIS.InsertMachineInstrInMaps(MI).getRegSlot(); - // Some (out-of-tree) targets have EC reload instructions. - if (MachineOperand *MO = MI->findRegisterDefOperand(NewLI.reg)) - if (MO->isEarlyClobber()) - LoadIdx = LoadIdx.getRegSlot(true); - DEBUG(dbgs() << "\treload: " << LoadIdx << '\t' << *MI); - VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, LIS.getVNInfoAllocator()); - NewLI.addRange(LiveRange(LoadIdx, Idx, LoadVNI)); + + MachineInstrSpan MIS(MI); + TII.loadRegFromStackSlot(MBB, MI, NewVReg, StackSlot, + MRI.getRegClass(NewVReg), &TRI); + + LIS.InsertMachineInstrRangeInMaps(MIS.begin(), MI); + + DEBUG(dumpMachineInstrRangeWithSlotIndex(MIS.begin(), MI, LIS, "reload", + NewVReg)); ++NumReloads; } -/// insertSpill - Insert a spill of NewLI.reg after MI. -void InlineSpiller::insertSpill(LiveInterval &NewLI, const LiveInterval &OldLI, - SlotIndex Idx, MachineBasicBlock::iterator MI) { +/// insertSpill - Insert a spill of NewVReg after MI. +void InlineSpiller::insertSpill(unsigned NewVReg, bool isKill, + MachineBasicBlock::iterator MI) { MachineBasicBlock &MBB = *MI->getParent(); - TII.storeRegToStackSlot(MBB, ++MI, NewLI.reg, true, StackSlot, - MRI.getRegClass(NewLI.reg), &TRI); - --MI; // Point to store instruction. - SlotIndex StoreIdx = LIS.InsertMachineInstrInMaps(MI).getRegSlot(); - DEBUG(dbgs() << "\tspilled: " << StoreIdx << '\t' << *MI); - VNInfo *StoreVNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator()); - NewLI.addRange(LiveRange(Idx, StoreIdx, StoreVNI)); + + MachineInstrSpan MIS(MI); + TII.storeRegToStackSlot(MBB, llvm::next(MI), NewVReg, isKill, StackSlot, + MRI.getRegClass(NewVReg), &TRI); + + LIS.InsertMachineInstrRangeInMaps(llvm::next(MI), MIS.end()); + + DEBUG(dumpMachineInstrRangeWithSlotIndex(llvm::next(MI), MIS.end(), LIS, + "spill")); ++NumSpills; } @@ -1152,7 +1194,7 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { // Debug values are not allowed to affect codegen. if (MI->isDebugValue()) { // Modify DBG_VALUE now that the value is in a spill slot. - bool IsIndirect = MI->getOperand(1).isImm(); + bool IsIndirect = MI->isIndirectDebugValue(); uint64_t Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; const MDNode *MDPtr = MI->getOperand(2).getMetadata(); DebugLoc DL = MI->getDebugLoc(); @@ -1212,19 +1254,18 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { if (foldMemoryOperand(Ops)) continue; - // Allocate interval around instruction. + // Create a new virtual register for spill/fill. // FIXME: Infer regclass from instruction alone. - LiveInterval &NewLI = Edit->createFrom(Reg); - NewLI.markNotSpillable(); + unsigned NewVReg = Edit->createFrom(Reg); if (RI.Reads) - insertReload(NewLI, Idx, MI); + insertReload(NewVReg, Idx, MI); // Rewrite instruction operands. bool hasLiveDef = false; for (unsigned i = 0, e = Ops.size(); i != e; ++i) { MachineOperand &MO = Ops[i].first->getOperand(Ops[i].second); - MO.setReg(NewLI.reg); + MO.setReg(NewVReg); if (MO.isUse()) { if (!Ops[i].first->isRegTiedToDefOperand(Ops[i].second)) MO.setIsKill(); @@ -1233,21 +1274,12 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { hasLiveDef = true; } } - DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI); + DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI << '\n'); // FIXME: Use a second vreg if instruction has no tied ops. - if (RI.Writes) { + if (RI.Writes) if (hasLiveDef) - insertSpill(NewLI, OldLI, Idx, MI); - else { - // This instruction defines a dead value. We don't need to spill it, - // but do create a live range for the dead value. - VNInfo *VNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator()); - NewLI.addRange(LiveRange(Idx, Idx.getDeadSlot(), VNI)); - } - } - - DEBUG(dbgs() << "\tinterval: " << NewLI << '\n'); + insertSpill(NewVReg, true, MI); } } @@ -1266,8 +1298,8 @@ void InlineSpiller::spillAll() { assert(StackInt->getNumValNums() == 1 && "Bad stack interval values"); for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) - StackInt->MergeRangesInAsValue(LIS.getInterval(RegsToSpill[i]), - StackInt->getValNumInfo(0)); + StackInt->MergeSegmentsInAsValue(LIS.getInterval(RegsToSpill[i]), + StackInt->getValNumInfo(0)); DEBUG(dbgs() << "Merged spilled regs: " << *StackInt << '\n'); // Spill around uses of all RegsToSpill. @@ -1308,8 +1340,8 @@ void InlineSpiller::spill(LiveRangeEdit &edit) { DEBUG(dbgs() << "Inline spilling " << MRI.getRegClass(edit.getReg())->getName() - << ':' << PrintReg(edit.getReg()) << ' ' << edit.getParent() - << "\nFrom original " << LIS.getInterval(Original) << '\n'); + << ':' << edit.getParent() + << "\nFrom original " << PrintReg(Original) << '\n'); assert(edit.getParent().isSpillable() && "Attempting to spill already spilled value."); assert(DeadDefs.empty() && "Previous spill didn't remove dead defs"); diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp index a8e711e..427225d 100644 --- a/lib/CodeGen/InterferenceCache.cpp +++ b/lib/CodeGen/InterferenceCache.cpp @@ -204,11 +204,11 @@ void InterferenceCache::Entry::update(unsigned MBBNum) { // Fixed interference. for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { LiveInterval::iterator &I = RegUnits[i].FixedI; - LiveInterval *LI = RegUnits[i].Fixed; - if (I == LI->end() || I->start >= Stop) + LiveRange *LR = RegUnits[i].Fixed; + if (I == LR->end() || I->start >= Stop) continue; - I = LI->advanceTo(I, Stop); - bool Backup = I == LI->end() || I->start >= Stop; + I = LR->advanceTo(I, Stop); + bool Backup = I == LR->end() || I->start >= Stop; if (Backup) --I; SlotIndex StopI = I->end; diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h index c02fb9a..800f705 100644 --- a/lib/CodeGen/InterferenceCache.h +++ b/lib/CodeGen/InterferenceCache.h @@ -72,7 +72,7 @@ class InterferenceCache { unsigned VirtTag; /// Fixed interference in RegUnit. - LiveInterval *Fixed; + LiveRange *Fixed; /// Iterator pointing into the fixed RegUnit interference. LiveInterval::iterator FixedI; diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp index d894f66..c38d4fb 100644 --- a/lib/CodeGen/IntrinsicLowering.cpp +++ b/lib/CodeGen/IntrinsicLowering.cpp @@ -485,11 +485,12 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { break; } case Intrinsic::memset: { - Type *IntPtr = TD.getIntPtrType(Context); + Value *Op0 = CI->getArgOperand(0); + Type *IntPtr = TD.getIntPtrType(Op0->getType()); Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, /* isSigned */ false); Value *Ops[3]; - Ops[0] = CI->getArgOperand(0); + Ops[0] = Op0; // Extend the amount to i32. Ops[1] = Builder.CreateIntCast(CI->getArgOperand(1), Type::getInt32Ty(Context), diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 6c9b2e5..ad2c553 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -175,12 +175,11 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, // Create a code emitter if asked to show the encoding. MCCodeEmitter *MCE = 0; - MCAsmBackend *MAB = 0; - if (ShowMCEncoding) { + if (ShowMCEncoding) MCE = getTarget().createMCCodeEmitter(MII, MRI, STI, *Context); - MAB = getTarget().createMCAsmBackend(getTargetTriple(), TargetCPU); - } + MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(), + TargetCPU); MCStreamer *S = getTarget().createAsmStreamer(*Context, Out, getVerboseAsm(), hasMCUseLoc(), @@ -197,7 +196,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, // emission fails. MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, STI, *Context); - MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple(), + MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(), TargetCPU); if (MCE == 0 || MAB == 0) return true; @@ -232,7 +231,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, /// addPassesToEmitMachineCode - Add passes to the specified pass manager to /// get machine code emitted. This uses a JITCodeEmitter object to handle /// actually outputting the machine code and resolving things like the address -/// of functions. This method should returns true if machine code emission is +/// of functions. This method should return true if machine code emission is /// not supported. /// bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM, @@ -271,7 +270,8 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI, STI, *Ctx); - MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple(), TargetCPU); + MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(), + TargetCPU); if (MCE == 0 || MAB == 0) return true; diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index 85bed46..25645e0 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -131,7 +131,8 @@ class UserValue { /// splitLocation - Replace OldLocNo ranges with NewRegs ranges where NewRegs /// is live. Returns true if any changes were made. - bool splitLocation(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs); + bool splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs, + LiveIntervals &LIS); public: /// UserValue - Create a new UserValue. @@ -219,13 +220,13 @@ public: /// End points where VNI is no longer live are added to Kills. /// @param Idx Starting point for the definition. /// @param LocNo Location number to propagate. - /// @param LI Restrict liveness to where LI has the value VNI. May be null. - /// @param VNI When LI is not null, this is the value to restrict to. + /// @param LR Restrict liveness to where LR has the value VNI. May be null. + /// @param VNI When LR is not null, this is the value to restrict to. /// @param Kills Append end points of VNI's live range to Kills. /// @param LIS Live intervals analysis. /// @param MDT Dominator tree. void extendDef(SlotIndex Idx, unsigned LocNo, - LiveInterval *LI, const VNInfo *VNI, + LiveRange *LR, const VNInfo *VNI, SmallVectorImpl<SlotIndex> *Kills, LiveIntervals &LIS, MachineDominatorTree &MDT, UserValueScopes &UVS); @@ -251,7 +252,8 @@ public: /// splitRegister - Replace OldReg ranges with NewRegs ranges where NewRegs is /// live. Returns true if any changes were made. - bool splitRegister(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs); + bool splitRegister(unsigned OldLocNo, ArrayRef<unsigned> NewRegs, + LiveIntervals &LIS); /// rewriteLocations - Rewrite virtual register locations according to the /// provided virtual register map. @@ -345,7 +347,7 @@ public: void mapVirtReg(unsigned VirtReg, UserValue *EC); /// splitRegister - Replace all references to OldReg with NewRegs. - void splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs); + void splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs); /// emitDebugValues - Recreate DBG_VALUE instruction from data structures. void emitDebugValues(VirtRegMap *VRM); @@ -455,9 +457,10 @@ bool LDVImpl::handleDebugValue(MachineInstr *MI, SlotIndex Idx) { } // Get or create the UserValue for (variable,offset). - bool IsIndirect = MI->getOperand(1).isImm(); + bool IsIndirect = MI->isIndirectDebugValue(); unsigned Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; const MDNode *Var = MI->getOperand(2).getMetadata(); + //here. UserValue *UV = getUserValue(Var, Offset, IsIndirect, MI->getDebugLoc()); UV->addDef(Idx, MI->getOperand(0)); return true; @@ -492,7 +495,7 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) { } void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, - LiveInterval *LI, const VNInfo *VNI, + LiveRange *LR, const VNInfo *VNI, SmallVectorImpl<SlotIndex> *Kills, LiveIntervals &LIS, MachineDominatorTree &MDT, UserValueScopes &UVS) { @@ -506,15 +509,15 @@ void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, // Limit to VNI's live range. bool ToEnd = true; - if (LI && VNI) { - LiveRange *Range = LI->getLiveRangeContaining(Start); - if (!Range || Range->valno != VNI) { + if (LR && VNI) { + LiveInterval::Segment *Segment = LR->getSegmentContaining(Start); + if (!Segment || Segment->valno != VNI) { if (Kills) Kills->push_back(Start); continue; } - if (Range->end < Stop) - Stop = Range->end, ToEnd = false; + if (Segment->end < Stop) + Stop = Segment->end, ToEnd = false; } // There could already be a short def at Start. @@ -666,10 +669,10 @@ UserValue::computeIntervals(MachineRegisterInfo &MRI, // For physregs, use the live range of the first regunit as a guide. unsigned Unit = *MCRegUnitIterator(Loc.getReg(), &TRI); - LiveInterval *LI = &LIS.getRegUnit(Unit); - const VNInfo *VNI = LI->getVNInfoAt(Idx); + LiveRange *LR = &LIS.getRegUnit(Unit); + const VNInfo *VNI = LR->getVNInfoAt(Idx); // Don't track copies from physregs, it is too expensive. - extendDef(Idx, LocNo, LI, VNI, 0, LIS, MDT, UVS); + extendDef(Idx, LocNo, LR, VNI, 0, LIS, MDT, UVS); } // Finally, erase all the undefs. @@ -729,7 +732,8 @@ LiveDebugVariables::~LiveDebugVariables() { //===----------------------------------------------------------------------===// bool -UserValue::splitLocation(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs) { +UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs, + LiveIntervals& LIS) { DEBUG({ dbgs() << "Splitting Loc" << OldLocNo << '\t'; print(dbgs(), 0); @@ -738,7 +742,7 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs) { LocMap::iterator LocMapI; LocMapI.setMap(locInts); for (unsigned i = 0; i != NewRegs.size(); ++i) { - LiveInterval *LI = NewRegs[i]; + LiveInterval *LI = &LIS.getInterval(NewRegs[i]); if (LI->empty()) continue; @@ -827,7 +831,8 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs) { } bool -UserValue::splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) { +UserValue::splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs, + LiveIntervals &LIS) { bool DidChange = false; // Split locations referring to OldReg. Iterate backwards so splitLocation can // safely erase unused locations. @@ -836,15 +841,15 @@ UserValue::splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) { const MachineOperand *Loc = &locations[LocNo]; if (!Loc->isReg() || Loc->getReg() != OldReg) continue; - DidChange |= splitLocation(LocNo, NewRegs); + DidChange |= splitLocation(LocNo, NewRegs, LIS); } return DidChange; } -void LDVImpl::splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) { +void LDVImpl::splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs) { bool DidChange = false; for (UserValue *UV = lookupVirtReg(OldReg); UV; UV = UV->getNext()) - DidChange |= UV->splitRegister(OldReg, NewRegs); + DidChange |= UV->splitRegister(OldReg, NewRegs, *LIS); if (!DidChange) return; @@ -852,11 +857,11 @@ void LDVImpl::splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) { // Map all of the new virtual registers. UserValue *UV = lookupVirtReg(OldReg); for (unsigned i = 0; i != NewRegs.size(); ++i) - mapVirtReg(NewRegs[i]->reg, UV); + mapVirtReg(NewRegs[i], UV); } void LiveDebugVariables:: -splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) { +splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs, LiveIntervals &LIS) { if (pImpl) static_cast<LDVImpl*>(pImpl)->splitRegister(OldReg, NewRegs); } diff --git a/lib/CodeGen/LiveDebugVariables.h b/lib/CodeGen/LiveDebugVariables.h index 3ce3c39..58a3f0f 100644 --- a/lib/CodeGen/LiveDebugVariables.h +++ b/lib/CodeGen/LiveDebugVariables.h @@ -27,6 +27,7 @@ namespace llvm { class LiveInterval; +class LiveIntervals; class VirtRegMap; class LiveDebugVariables : public MachineFunctionPass { @@ -47,7 +48,8 @@ public: /// splitRegister - Move any user variables in OldReg to the live ranges in /// NewRegs where they are live. Mark the values as unavailable where no new /// register is live. - void splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs); + void splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs, + LiveIntervals &LIS); /// emitDebugValues - Emit new DBG_VALUE instructions reflecting the changes /// that happened during register allocation. diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index 6be6bf3..2b8feb8 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -9,12 +9,12 @@ // // This file implements the LiveRange and LiveInterval classes. Given some // numbering of each the machine instructions an interval [i, j) is said to be a -// live interval for register v if there is no instruction with number j' > j +// live range for register v if there is no instruction with number j' >= j // such that v is live at j' and there is no instruction with number i' < i such -// that v is live at i'. In this implementation intervals can have holes, -// i.e. an interval might look like [1,20), [50,65), [1000,1001). Each -// individual range is represented as an instance of LiveRange, and the whole -// interval is represented as an instance of LiveInterval. +// that v is live at i'. In this implementation ranges can have holes, +// i.e. a range might look like [1,20), [50,65), [1000,1001). Each +// individual segment is represented as an instance of LiveRange::Segment, +// and the whole range is represented as an instance of LiveRange. // //===----------------------------------------------------------------------===// @@ -31,14 +31,14 @@ #include <algorithm> using namespace llvm; -LiveInterval::iterator LiveInterval::find(SlotIndex Pos) { +LiveRange::iterator LiveRange::find(SlotIndex Pos) { // This algorithm is basically std::upper_bound. // Unfortunately, std::upper_bound cannot be used with mixed types until we // adopt C++0x. Many libraries can do it, but not all. if (empty() || Pos >= endIndex()) return end(); iterator I = begin(); - size_t Len = ranges.size(); + size_t Len = size(); do { size_t Mid = Len >> 1; if (Pos < I[Mid].end) @@ -49,13 +49,13 @@ LiveInterval::iterator LiveInterval::find(SlotIndex Pos) { return I; } -VNInfo *LiveInterval::createDeadDef(SlotIndex Def, - VNInfo::Allocator &VNInfoAllocator) { +VNInfo *LiveRange::createDeadDef(SlotIndex Def, + VNInfo::Allocator &VNInfoAllocator) { assert(!Def.isDead() && "Cannot define a value at the dead slot"); iterator I = find(Def); if (I == end()) { VNInfo *VNI = getNextValue(Def, VNInfoAllocator); - ranges.push_back(LiveRange(Def, Def.getDeadSlot(), VNI)); + segments.push_back(Segment(Def, Def.getDeadSlot(), VNI)); return VNI; } if (SlotIndex::isSameInstr(Def, I->start)) { @@ -73,11 +73,11 @@ VNInfo *LiveInterval::createDeadDef(SlotIndex Def, } assert(SlotIndex::isEarlierInstr(Def, I->start) && "Already live at def"); VNInfo *VNI = getNextValue(Def, VNInfoAllocator); - ranges.insert(I, LiveRange(Def, Def.getDeadSlot(), VNI)); + segments.insert(I, Segment(Def, Def.getDeadSlot(), VNI)); return VNI; } -// overlaps - Return true if the intersection of the two live intervals is +// overlaps - Return true if the intersection of the two live ranges is // not empty. // // An example for overlaps(): @@ -86,7 +86,7 @@ VNInfo *LiveInterval::createDeadDef(SlotIndex Def, // 4: B = ... // 8: C = A + B ;; last use of A // -// The live intervals should look like: +// The live ranges should look like: // // A = [3, 11) // B = [7, x) @@ -95,9 +95,9 @@ VNInfo *LiveInterval::createDeadDef(SlotIndex Def, // A->overlaps(C) should return false since we want to be able to join // A and C. // -bool LiveInterval::overlapsFrom(const LiveInterval& other, - const_iterator StartPos) const { - assert(!empty() && "empty interval"); +bool LiveRange::overlapsFrom(const LiveRange& other, + const_iterator StartPos) const { + assert(!empty() && "empty range"); const_iterator i = begin(); const_iterator ie = end(); const_iterator j = StartPos; @@ -108,13 +108,13 @@ bool LiveInterval::overlapsFrom(const LiveInterval& other, if (i->start < j->start) { i = std::upper_bound(i, ie, j->start); - if (i != ranges.begin()) --i; + if (i != begin()) --i; } else if (j->start < i->start) { ++StartPos; if (StartPos != other.end() && StartPos->start <= i->start) { assert(StartPos < other.end() && i < end()); j = std::upper_bound(j, je, i->start); - if (j != other.ranges.begin()) --j; + if (j != other.begin()) --j; } } else { return true; @@ -136,10 +136,9 @@ bool LiveInterval::overlapsFrom(const LiveInterval& other, return false; } -bool LiveInterval::overlaps(const LiveInterval &Other, - const CoalescerPair &CP, - const SlotIndexes &Indexes) const { - assert(!empty() && "empty interval"); +bool LiveRange::overlaps(const LiveRange &Other, const CoalescerPair &CP, + const SlotIndexes &Indexes) const { + assert(!empty() && "empty range"); if (Other.empty()) return false; @@ -178,9 +177,9 @@ bool LiveInterval::overlaps(const LiveInterval &Other, } } -/// overlaps - Return true if the live interval overlaps a range specified +/// overlaps - Return true if the live range overlaps an interval specified /// by [Start, End). -bool LiveInterval::overlaps(SlotIndex Start, SlotIndex End) const { +bool LiveRange::overlaps(SlotIndex Start, SlotIndex End) const { assert(Start < End && "Invalid range"); const_iterator I = std::lower_bound(begin(), end(), End); return I != begin() && (--I)->end > Start; @@ -190,7 +189,7 @@ bool LiveInterval::overlaps(SlotIndex Start, SlotIndex End) const { /// ValNo is dead, remove it. If it is the largest value number, just nuke it /// (and any other deleted values neighboring it), otherwise mark it as ~1U so /// it can be nuked later. -void LiveInterval::markValNoForDeletion(VNInfo *ValNo) { +void LiveRange::markValNoForDeletion(VNInfo *ValNo) { if (ValNo->id == getNumValNums()-1) { do { valnos.pop_back(); @@ -202,137 +201,135 @@ void LiveInterval::markValNoForDeletion(VNInfo *ValNo) { /// RenumberValues - Renumber all values in order of appearance and delete the /// remaining unused values. -void LiveInterval::RenumberValues(LiveIntervals &lis) { +void LiveRange::RenumberValues() { SmallPtrSet<VNInfo*, 8> Seen; valnos.clear(); for (const_iterator I = begin(), E = end(); I != E; ++I) { VNInfo *VNI = I->valno; if (!Seen.insert(VNI)) continue; - assert(!VNI->isUnused() && "Unused valno used by live range"); + assert(!VNI->isUnused() && "Unused valno used by live segment"); VNI->id = (unsigned)valnos.size(); valnos.push_back(VNI); } } -/// extendIntervalEndTo - This method is used when we want to extend the range -/// specified by I to end at the specified endpoint. To do this, we should -/// merge and eliminate all ranges that this will overlap with. The iterator is -/// not invalidated. -void LiveInterval::extendIntervalEndTo(Ranges::iterator I, SlotIndex NewEnd) { - assert(I != ranges.end() && "Not a valid interval!"); +/// This method is used when we want to extend the segment specified by I to end +/// at the specified endpoint. To do this, we should merge and eliminate all +/// segments that this will overlap with. The iterator is not invalidated. +void LiveRange::extendSegmentEndTo(iterator I, SlotIndex NewEnd) { + assert(I != end() && "Not a valid segment!"); VNInfo *ValNo = I->valno; - // Search for the first interval that we can't merge with. - Ranges::iterator MergeTo = llvm::next(I); - for (; MergeTo != ranges.end() && NewEnd >= MergeTo->end; ++MergeTo) { + // Search for the first segment that we can't merge with. + iterator MergeTo = llvm::next(I); + for (; MergeTo != end() && NewEnd >= MergeTo->end; ++MergeTo) { assert(MergeTo->valno == ValNo && "Cannot merge with differing values!"); } - // If NewEnd was in the middle of an interval, make sure to get its endpoint. + // If NewEnd was in the middle of a segment, make sure to get its endpoint. I->end = std::max(NewEnd, prior(MergeTo)->end); - // If the newly formed range now touches the range after it and if they have - // the same value number, merge the two ranges into one range. - if (MergeTo != ranges.end() && MergeTo->start <= I->end && + // If the newly formed segment now touches the segment after it and if they + // have the same value number, merge the two segments into one segment. + if (MergeTo != end() && MergeTo->start <= I->end && MergeTo->valno == ValNo) { I->end = MergeTo->end; ++MergeTo; } - // Erase any dead ranges. - ranges.erase(llvm::next(I), MergeTo); + // Erase any dead segments. + segments.erase(llvm::next(I), MergeTo); } -/// extendIntervalStartTo - This method is used when we want to extend the range -/// specified by I to start at the specified endpoint. To do this, we should -/// merge and eliminate all ranges that this will overlap with. -LiveInterval::Ranges::iterator -LiveInterval::extendIntervalStartTo(Ranges::iterator I, SlotIndex NewStart) { - assert(I != ranges.end() && "Not a valid interval!"); +/// This method is used when we want to extend the segment specified by I to +/// start at the specified endpoint. To do this, we should merge and eliminate +/// all segments that this will overlap with. +LiveRange::iterator +LiveRange::extendSegmentStartTo(iterator I, SlotIndex NewStart) { + assert(I != end() && "Not a valid segment!"); VNInfo *ValNo = I->valno; - // Search for the first interval that we can't merge with. - Ranges::iterator MergeTo = I; + // Search for the first segment that we can't merge with. + iterator MergeTo = I; do { - if (MergeTo == ranges.begin()) { + if (MergeTo == begin()) { I->start = NewStart; - ranges.erase(MergeTo, I); + segments.erase(MergeTo, I); return I; } assert(MergeTo->valno == ValNo && "Cannot merge with differing values!"); --MergeTo; } while (NewStart <= MergeTo->start); - // If we start in the middle of another interval, just delete a range and - // extend that interval. + // If we start in the middle of another segment, just delete a range and + // extend that segment. if (MergeTo->end >= NewStart && MergeTo->valno == ValNo) { MergeTo->end = I->end; } else { - // Otherwise, extend the interval right after. + // Otherwise, extend the segment right after. ++MergeTo; MergeTo->start = NewStart; MergeTo->end = I->end; } - ranges.erase(llvm::next(MergeTo), llvm::next(I)); + segments.erase(llvm::next(MergeTo), llvm::next(I)); return MergeTo; } -LiveInterval::iterator -LiveInterval::addRangeFrom(LiveRange LR, iterator From) { - SlotIndex Start = LR.start, End = LR.end; - iterator it = std::upper_bound(From, ranges.end(), Start); +LiveRange::iterator LiveRange::addSegmentFrom(Segment S, iterator From) { + SlotIndex Start = S.start, End = S.end; + iterator it = std::upper_bound(From, end(), Start); - // If the inserted interval starts in the middle or right at the end of - // another interval, just extend that interval to contain the range of LR. - if (it != ranges.begin()) { + // If the inserted segment starts in the middle or right at the end of + // another segment, just extend that segment to contain the segment of S. + if (it != begin()) { iterator B = prior(it); - if (LR.valno == B->valno) { + if (S.valno == B->valno) { if (B->start <= Start && B->end >= Start) { - extendIntervalEndTo(B, End); + extendSegmentEndTo(B, End); return B; } } else { - // Check to make sure that we are not overlapping two live ranges with + // Check to make sure that we are not overlapping two live segments with // different valno's. assert(B->end <= Start && - "Cannot overlap two LiveRanges with differing ValID's" + "Cannot overlap two segments with differing ValID's" " (did you def the same reg twice in a MachineInstr?)"); } } - // Otherwise, if this range ends in the middle of, or right next to, another - // interval, merge it into that interval. - if (it != ranges.end()) { - if (LR.valno == it->valno) { + // Otherwise, if this segment ends in the middle of, or right next to, another + // segment, merge it into that segment. + if (it != end()) { + if (S.valno == it->valno) { if (it->start <= End) { - it = extendIntervalStartTo(it, Start); + it = extendSegmentStartTo(it, Start); - // If LR is a complete superset of an interval, we may need to grow its + // If S is a complete superset of a segment, we may need to grow its // endpoint as well. if (End > it->end) - extendIntervalEndTo(it, End); + extendSegmentEndTo(it, End); return it; } } else { - // Check to make sure that we are not overlapping two live ranges with + // Check to make sure that we are not overlapping two live segments with // different valno's. assert(it->start >= End && - "Cannot overlap two LiveRanges with differing ValID's"); + "Cannot overlap two segments with differing ValID's"); } } - // Otherwise, this is just a new range that doesn't interact with anything. + // Otherwise, this is just a new segment that doesn't interact with anything. // Insert it. - return ranges.insert(it, LR); + return segments.insert(it, S); } -/// extendInBlock - If this interval is live before Kill in the basic +/// extendInBlock - If this range is live before Kill in the basic /// block that starts at StartIdx, extend it to be live up to Kill and return /// the value. If there is no live range before Kill, return NULL. -VNInfo *LiveInterval::extendInBlock(SlotIndex StartIdx, SlotIndex Kill) { +VNInfo *LiveRange::extendInBlock(SlotIndex StartIdx, SlotIndex Kill) { if (empty()) return 0; iterator I = std::upper_bound(begin(), end(), Kill.getPrevSlot()); @@ -342,20 +339,21 @@ VNInfo *LiveInterval::extendInBlock(SlotIndex StartIdx, SlotIndex Kill) { if (I->end <= StartIdx) return 0; if (I->end < Kill) - extendIntervalEndTo(I, Kill); + extendSegmentEndTo(I, Kill); return I->valno; } -/// removeRange - Remove the specified range from this interval. Note that -/// the range must be in a single LiveRange in its entirety. -void LiveInterval::removeRange(SlotIndex Start, SlotIndex End, - bool RemoveDeadValNo) { - // Find the LiveRange containing this span. - Ranges::iterator I = find(Start); - assert(I != ranges.end() && "Range is not in interval!"); - assert(I->containsRange(Start, End) && "Range is not entirely in interval!"); +/// Remove the specified segment from this range. Note that the segment must +/// be in a single Segment in its entirety. +void LiveRange::removeSegment(SlotIndex Start, SlotIndex End, + bool RemoveDeadValNo) { + // Find the Segment containing this span. + iterator I = find(Start); + assert(I != end() && "Segment is not in range!"); + assert(I->containsInterval(Start, End) + && "Segment is not entirely in range!"); - // If the span we are removing is at the start of the LiveRange, adjust it. + // If the span we are removing is at the start of the Segment, adjust it. VNInfo *ValNo = I->valno; if (I->start == Start) { if (I->end == End) { @@ -373,54 +371,50 @@ void LiveInterval::removeRange(SlotIndex Start, SlotIndex End, } } - ranges.erase(I); // Removed the whole LiveRange. + segments.erase(I); // Removed the whole Segment. } else I->start = End; return; } - // Otherwise if the span we are removing is at the end of the LiveRange, + // Otherwise if the span we are removing is at the end of the Segment, // adjust the other way. if (I->end == End) { I->end = Start; return; } - // Otherwise, we are splitting the LiveRange into two pieces. + // Otherwise, we are splitting the Segment into two pieces. SlotIndex OldEnd = I->end; - I->end = Start; // Trim the old interval. + I->end = Start; // Trim the old segment. // Insert the new one. - ranges.insert(llvm::next(I), LiveRange(End, OldEnd, ValNo)); + segments.insert(llvm::next(I), Segment(End, OldEnd, ValNo)); } -/// removeValNo - Remove all the ranges defined by the specified value#. +/// removeValNo - Remove all the segments defined by the specified value#. /// Also remove the value# from value# list. -void LiveInterval::removeValNo(VNInfo *ValNo) { +void LiveRange::removeValNo(VNInfo *ValNo) { if (empty()) return; - Ranges::iterator I = ranges.end(); - Ranges::iterator E = ranges.begin(); + iterator I = end(); + iterator E = begin(); do { --I; if (I->valno == ValNo) - ranges.erase(I); + segments.erase(I); } while (I != E); // Now that ValNo is dead, remove it. markValNoForDeletion(ValNo); } -/// join - Join two live intervals (this, and other) together. This applies -/// mappings to the value numbers in the LHS/RHS intervals as specified. If -/// the intervals are not joinable, this aborts. -void LiveInterval::join(LiveInterval &Other, - const int *LHSValNoAssignments, - const int *RHSValNoAssignments, - SmallVectorImpl<VNInfo *> &NewVNInfo, - MachineRegisterInfo *MRI) { +void LiveRange::join(LiveRange &Other, + const int *LHSValNoAssignments, + const int *RHSValNoAssignments, + SmallVectorImpl<VNInfo *> &NewVNInfo) { verify(); - // Determine if any of our live range values are mapped. This is uncommon, so - // we want to avoid the interval scan if not. + // Determine if any of our values are mapped. This is uncommon, so we want + // to avoid the range scan if not. bool MustMapCurValNos = false; unsigned NumVals = getNumValNums(); unsigned NumNewVals = NewVNInfo.size(); @@ -433,8 +427,7 @@ void LiveInterval::join(LiveInterval &Other, } } - // If we have to apply a mapping to our base interval assignment, rewrite it - // now. + // If we have to apply a mapping to our base range assignment, rewrite it now. if (MustMapCurValNos && !empty()) { // Map the first live range. @@ -445,12 +438,12 @@ void LiveInterval::join(LiveInterval &Other, assert(nextValNo != 0 && "Huh?"); // If this live range has the same value # as its immediate predecessor, - // and if they are neighbors, remove one LiveRange. This happens when we + // and if they are neighbors, remove one Segment. This happens when we // have [0,4:0)[4,7:1) and map 0/1 onto the same value #. if (OutIt->valno == nextValNo && OutIt->end == I->start) { OutIt->end = I->end; } else { - // Didn't merge. Move OutIt to the next interval, + // Didn't merge. Move OutIt to the next segment, ++OutIt; OutIt->valno = nextValNo; if (OutIt != I) { @@ -459,9 +452,9 @@ void LiveInterval::join(LiveInterval &Other, } } } - // If we merge some live ranges, chop off the end. + // If we merge some segments, chop off the end. ++OutIt; - ranges.erase(OutIt, end()); + segments.erase(OutIt, end()); } // Rewrite Other values before changing the VNInfo ids. @@ -472,7 +465,7 @@ void LiveInterval::join(LiveInterval &Other, I->valno = NewVNInfo[RHSValNoAssignments[I->valno->id]]; // Update val# info. Renumber them and make sure they all belong to this - // LiveInterval now. Also remove dead val#'s. + // LiveRange now. Also remove dead val#'s. unsigned NumValNos = 0; for (unsigned i = 0; i < NumNewVals; ++i) { VNInfo *VNI = NewVNInfo[i]; @@ -487,31 +480,31 @@ void LiveInterval::join(LiveInterval &Other, if (NumNewVals < NumVals) valnos.resize(NumNewVals); // shrinkify - // Okay, now insert the RHS live ranges into the LHS. + // Okay, now insert the RHS live segments into the LHS. LiveRangeUpdater Updater(this); for (iterator I = Other.begin(), E = Other.end(); I != E; ++I) Updater.add(*I); } -/// MergeRangesInAsValue - Merge all of the intervals in RHS into this live -/// interval as the specified value number. The LiveRanges in RHS are -/// allowed to overlap with LiveRanges in the current interval, but only if -/// the overlapping LiveRanges have the specified value number. -void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS, - VNInfo *LHSValNo) { +/// Merge all of the segments in RHS into this live range as the specified +/// value number. The segments in RHS are allowed to overlap with segments in +/// the current range, but only if the overlapping segments have the +/// specified value number. +void LiveRange::MergeSegmentsInAsValue(const LiveRange &RHS, + VNInfo *LHSValNo) { LiveRangeUpdater Updater(this); for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) Updater.add(I->start, I->end, LHSValNo); } -/// MergeValueInAsValue - Merge all of the live ranges of a specific val# -/// in RHS into this live interval as the specified value number. -/// The LiveRanges in RHS are allowed to overlap with LiveRanges in the -/// current interval, it will replace the value numbers of the overlaped -/// live ranges with the specified value number. -void LiveInterval::MergeValueInAsValue(const LiveInterval &RHS, - const VNInfo *RHSValNo, - VNInfo *LHSValNo) { +/// MergeValueInAsValue - Merge all of the live segments of a specific val# +/// in RHS into this live range as the specified value number. +/// The segments in RHS are allowed to overlap with segments in the +/// current range, it will replace the value numbers of the overlaped +/// segments with the specified value number. +void LiveRange::MergeValueInAsValue(const LiveRange &RHS, + const VNInfo *RHSValNo, + VNInfo *LHSValNo) { LiveRangeUpdater Updater(this); for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) if (I->valno == RHSValNo) @@ -520,9 +513,9 @@ void LiveInterval::MergeValueInAsValue(const LiveInterval &RHS, /// MergeValueNumberInto - This method is called when two value nubmers /// are found to be equivalent. This eliminates V1, replacing all -/// LiveRanges with the V1 value number with the V2 value number. This can +/// segments with the V1 value number with the V2 value number. This can /// cause merging of V1/V2 values numbers and compaction of the value space. -VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) { +VNInfo *LiveRange::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) { assert(V1 != V2 && "Identical value#'s are always equivalent!"); // This code actually merges the (numerically) larger value number into the @@ -536,37 +529,37 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) { std::swap(V1, V2); } - // Merge V1 live ranges into V2. + // Merge V1 segments into V2. for (iterator I = begin(); I != end(); ) { - iterator LR = I++; - if (LR->valno != V1) continue; // Not a V1 LiveRange. + iterator S = I++; + if (S->valno != V1) continue; // Not a V1 Segment. // Okay, we found a V1 live range. If it had a previous, touching, V2 live // range, extend it. - if (LR != begin()) { - iterator Prev = LR-1; - if (Prev->valno == V2 && Prev->end == LR->start) { - Prev->end = LR->end; + if (S != begin()) { + iterator Prev = S-1; + if (Prev->valno == V2 && Prev->end == S->start) { + Prev->end = S->end; // Erase this live-range. - ranges.erase(LR); + segments.erase(S); I = Prev+1; - LR = Prev; + S = Prev; } } // Okay, now we have a V1 or V2 live range that is maximally merged forward. // Ensure that it is a V2 live-range. - LR->valno = V2; + S->valno = V2; - // If we can merge it into later V2 live ranges, do so now. We ignore any - // following V1 live ranges, as they will be merged in subsequent iterations + // If we can merge it into later V2 segments, do so now. We ignore any + // following V1 segments, as they will be merged in subsequent iterations // of the loop. if (I != end()) { - if (I->start == LR->end && I->valno == V2) { - LR->end = I->end; - ranges.erase(I); - I = LR+1; + if (I->start == S->end && I->valno == V2) { + S->end = I->end; + segments.erase(I); + I = S+1; } } } @@ -584,22 +577,21 @@ unsigned LiveInterval::getSize() const { return Sum; } -raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange &LR) { - return os << '[' << LR.start << ',' << LR.end << ':' << LR.valno->id << ")"; +raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange::Segment &S) { + return os << '[' << S.start << ',' << S.end << ':' << S.valno->id << ")"; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void LiveRange::dump() const { +void LiveRange::Segment::dump() const { dbgs() << *this << "\n"; } #endif -void LiveInterval::print(raw_ostream &OS) const { +void LiveRange::print(raw_ostream &OS) const { if (empty()) OS << "EMPTY"; else { - for (LiveInterval::Ranges::const_iterator I = ranges.begin(), - E = ranges.end(); I != E; ++I) { + for (const_iterator I = begin(), E = end(); I != E; ++I) { OS << *I; assert(I->valno == getValNumInfo(I->valno->id) && "Bad VNInfo"); } @@ -625,19 +617,29 @@ void LiveInterval::print(raw_ostream &OS) const { } } +void LiveInterval::print(raw_ostream &OS) const { + OS << PrintReg(reg) << ' '; + super::print(OS); +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void LiveRange::dump() const { + dbgs() << *this << "\n"; +} + void LiveInterval::dump() const { dbgs() << *this << "\n"; } #endif #ifndef NDEBUG -void LiveInterval::verify() const { +void LiveRange::verify() const { for (const_iterator I = begin(), E = end(); I != E; ++I) { assert(I->start.isValid()); assert(I->end.isValid()); assert(I->start < I->end); assert(I->valno != 0); + assert(I->valno->id < valnos.size()); assert(I->valno == valnos[I->valno->id]); if (llvm::next(I) != E) { assert(I->end <= llvm::next(I)->start); @@ -649,10 +651,6 @@ void LiveInterval::verify() const { #endif -void LiveRange::print(raw_ostream &os) const { - os << *this; -} - //===----------------------------------------------------------------------===// // LiveRangeUpdater class //===----------------------------------------------------------------------===// @@ -665,11 +663,11 @@ void LiveRange::print(raw_ostream &os) const { // // Otherwise, segments are kept in three separate areas: // -// 1. [begin; WriteI) at the front of LI. -// 2. [ReadI; end) at the back of LI. +// 1. [begin; WriteI) at the front of LR. +// 2. [ReadI; end) at the back of LR. // 3. Spills. // -// - LI.begin() <= WriteI <= ReadI <= LI.end(). +// - LR.begin() <= WriteI <= ReadI <= LR.end(). // - Segments in all three areas are fully ordered and coalesced. // - Segments in area 1 precede and can't coalesce with segments in area 2. // - Segments in Spills precede and can't coalesce with segments in area 2. @@ -684,23 +682,23 @@ void LiveRange::print(raw_ostream &os) const { void LiveRangeUpdater::print(raw_ostream &OS) const { if (!isDirty()) { - if (LI) - OS << "Clean " << PrintReg(LI->reg) << " updater: " << *LI << '\n'; + if (LR) + OS << "Clean updater: " << *LR << '\n'; else OS << "Null updater.\n"; return; } - assert(LI && "Can't have null LI in dirty updater."); - OS << PrintReg(LI->reg) << " updater with gap = " << (ReadI - WriteI) + assert(LR && "Can't have null LR in dirty updater."); + OS << " updater with gap = " << (ReadI - WriteI) << ", last start = " << LastStart << ":\n Area 1:"; - for (LiveInterval::const_iterator I = LI->begin(); I != WriteI; ++I) + for (LiveRange::const_iterator I = LR->begin(); I != WriteI; ++I) OS << ' ' << *I; OS << "\n Spills:"; for (unsigned I = 0, E = Spills.size(); I != E; ++I) OS << ' ' << Spills[I]; OS << "\n Area 2:"; - for (LiveInterval::const_iterator I = ReadI, E = LI->end(); I != E; ++I) + for (LiveRange::const_iterator I = ReadI, E = LR->end(); I != E; ++I) OS << ' ' << *I; OS << '\n'; } @@ -711,8 +709,9 @@ void LiveRangeUpdater::dump() const } // Determine if A and B should be coalesced. -static inline bool coalescable(const LiveRange &A, const LiveRange &B) { - assert(A.start <= B.start && "Unordered live ranges."); +static inline bool coalescable(const LiveRange::Segment &A, + const LiveRange::Segment &B) { + assert(A.start <= B.start && "Unordered live segments."); if (A.end == B.start) return A.valno == B.valno; if (A.end < B.start) @@ -721,8 +720,8 @@ static inline bool coalescable(const LiveRange &A, const LiveRange &B) { return true; } -void LiveRangeUpdater::add(LiveRange Seg) { - assert(LI && "Cannot add to a null destination"); +void LiveRangeUpdater::add(LiveRange::Segment Seg) { + assert(LR && "Cannot add to a null destination"); // Flush the state if Start moves backwards. if (!LastStart.isValid() || LastStart > Seg.start) { @@ -730,21 +729,21 @@ void LiveRangeUpdater::add(LiveRange Seg) { flush(); // This brings us to an uninitialized state. Reinitialize. assert(Spills.empty() && "Leftover spilled segments"); - WriteI = ReadI = LI->begin(); + WriteI = ReadI = LR->begin(); } // Remember start for next time. LastStart = Seg.start; // Advance ReadI until it ends after Seg.start. - LiveInterval::iterator E = LI->end(); + LiveRange::iterator E = LR->end(); if (ReadI != E && ReadI->end <= Seg.start) { // First try to close the gap between WriteI and ReadI with spills. if (ReadI != WriteI) mergeSpills(); // Then advance ReadI. if (ReadI == WriteI) - ReadI = WriteI = LI->find(Seg.start); + ReadI = WriteI = LR->find(Seg.start); else while (ReadI != E && ReadI->end <= Seg.start) *WriteI++ = *ReadI++; @@ -777,7 +776,7 @@ void LiveRangeUpdater::add(LiveRange Seg) { } // Try coalescing Seg into WriteI[-1]. - if (WriteI != LI->begin() && coalescable(WriteI[-1], Seg)) { + if (WriteI != LR->begin() && coalescable(WriteI[-1], Seg)) { WriteI[-1].end = std::max(WriteI[-1].end, Seg.end); return; } @@ -788,10 +787,10 @@ void LiveRangeUpdater::add(LiveRange Seg) { return; } - // Finally, append to LI or Spills. + // Finally, append to LR or Spills. if (WriteI == E) { - LI->ranges.push_back(Seg); - WriteI = ReadI = LI->ranges.end(); + LR->segments.push_back(Seg); + WriteI = ReadI = LR->end(); } else Spills.push_back(Seg); } @@ -802,10 +801,10 @@ void LiveRangeUpdater::mergeSpills() { // Perform a backwards merge of Spills and [SpillI;WriteI). size_t GapSize = ReadI - WriteI; size_t NumMoved = std::min(Spills.size(), GapSize); - LiveInterval::iterator Src = WriteI; - LiveInterval::iterator Dst = Src + NumMoved; - LiveInterval::iterator SpillSrc = Spills.end(); - LiveInterval::iterator B = LI->begin(); + LiveRange::iterator Src = WriteI; + LiveRange::iterator Dst = Src + NumMoved; + LiveRange::iterator SpillSrc = Spills.end(); + LiveRange::iterator B = LR->begin(); // This is the new WriteI position after merging spills. WriteI = Dst; @@ -827,12 +826,12 @@ void LiveRangeUpdater::flush() { // Clear the dirty state. LastStart = SlotIndex(); - assert(LI && "Cannot add to a null destination"); + assert(LR && "Cannot add to a null destination"); // Nothing to merge? if (Spills.empty()) { - LI->ranges.erase(WriteI, ReadI); - LI->verify(); + LR->segments.erase(WriteI, ReadI); + LR->verify(); return; } @@ -840,17 +839,17 @@ void LiveRangeUpdater::flush() { size_t GapSize = ReadI - WriteI; if (GapSize < Spills.size()) { // The gap is too small. Make some room. - size_t WritePos = WriteI - LI->begin(); - LI->ranges.insert(ReadI, Spills.size() - GapSize, LiveRange()); + size_t WritePos = WriteI - LR->begin(); + LR->segments.insert(ReadI, Spills.size() - GapSize, LiveRange::Segment()); // This also invalidated ReadI, but it is recomputed below. - WriteI = LI->ranges.begin() + WritePos; + WriteI = LR->begin() + WritePos; } else { // Shrink the gap if necessary. - LI->ranges.erase(WriteI + Spills.size(), ReadI); + LR->segments.erase(WriteI + Spills.size(), ReadI); } ReadI = WriteI + Spills.size(); mergeSpills(); - LI->verify(); + LR->verify(); } unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) { @@ -918,7 +917,7 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[], Idx = LIS.getSlotIndexes()->getIndexBefore(MI); else Idx = LIS.getInstructionIndex(MI); - LiveRangeQuery LRQ(LI, Idx); + LiveQueryResult LRQ = LI.Query(Idx); const VNInfo *VNI = MO.readsReg() ? LRQ.valueIn() : LRQ.valueDefined(); // In the case of an <undef> use that isn't tied to any def, VNI will be // NULL. If the use is tied to a def, VNI will be the defined value. @@ -935,11 +934,11 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[], if (unsigned eq = EqClass[I->valno->id]) { assert((LIV[eq]->empty() || LIV[eq]->expiredAt(I->start)) && "New intervals should be empty"); - LIV[eq]->ranges.push_back(*I); + LIV[eq]->segments.push_back(*I); } else *J++ = *I; } - LI.ranges.erase(J, E); + LI.segments.erase(J, E); // Transfer VNInfos to their new owners and renumber them. unsigned j = 0, e = LI.getNumValNums(); diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 3680943..e1c3217 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -95,15 +95,15 @@ void LiveIntervals::releaseMemory() { RegMaskBits.clear(); RegMaskBlocks.clear(); - for (unsigned i = 0, e = RegUnitIntervals.size(); i != e; ++i) - delete RegUnitIntervals[i]; - RegUnitIntervals.clear(); + for (unsigned i = 0, e = RegUnitRanges.size(); i != e; ++i) + delete RegUnitRanges[i]; + RegUnitRanges.clear(); // Release VNInfo memory regions, VNInfo objects don't need to be dtor'd. VNInfoAllocator.Reset(); } -/// runOnMachineFunction - Register allocate the whole function +/// runOnMachineFunction - calculates LiveIntervals /// bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { MF = &fn; @@ -139,15 +139,15 @@ void LiveIntervals::print(raw_ostream &OS, const Module* ) const { OS << "********** INTERVALS **********\n"; // Dump the regunits. - for (unsigned i = 0, e = RegUnitIntervals.size(); i != e; ++i) - if (LiveInterval *LI = RegUnitIntervals[i]) - OS << PrintRegUnit(i, TRI) << " = " << *LI << '\n'; + for (unsigned i = 0, e = RegUnitRanges.size(); i != e; ++i) + if (LiveRange *LR = RegUnitRanges[i]) + OS << PrintRegUnit(i, TRI) << ' ' << *LR << '\n'; // Dump the virtregs. for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); if (hasInterval(Reg)) - OS << PrintReg(Reg) << " = " << getInterval(Reg) << '\n'; + OS << getInterval(Reg) << '\n'; } OS << "RegMasks:"; @@ -170,16 +170,17 @@ void LiveIntervals::dumpInstrs() const { #endif LiveInterval* LiveIntervals::createInterval(unsigned reg) { - float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ? HUGE_VALF : 0.0F; + float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ? + llvm::huge_valf : 0.0F; return new LiveInterval(reg, Weight); } /// computeVirtRegInterval - Compute the live interval of a virtual register, /// based on defs and uses. -void LiveIntervals::computeVirtRegInterval(LiveInterval *LI) { +void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) { assert(LRCalc && "LRCalc not initialized."); - assert(LI->empty() && "Should only compute empty intervals."); + assert(LI.empty() && "Should only compute empty intervals."); LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); LRCalc->createDeadDefs(LI); LRCalc->extendToUses(LI); @@ -190,9 +191,7 @@ void LiveIntervals::computeVirtRegs() { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); if (MRI->reg_nodbg_empty(Reg)) continue; - LiveInterval *LI = createInterval(Reg); - VirtRegIntervals[Reg] = LI; - computeVirtRegInterval(LI); + createAndComputeVirtRegInterval(Reg); } } @@ -229,12 +228,10 @@ void LiveIntervals::computeRegMasks() { // interference. // -/// computeRegUnitInterval - Compute the live interval of a register unit, based -/// on the uses and defs of aliasing registers. The interval should be empty, +/// computeRegUnitInterval - Compute the live range of a register unit, based +/// on the uses and defs of aliasing registers. The range should be empty, /// or contain only dead phi-defs from ABI blocks. -void LiveIntervals::computeRegUnitInterval(LiveInterval *LI) { - unsigned Unit = LI->reg; - +void LiveIntervals::computeRegUnitRange(LiveRange &LR, unsigned Unit) { assert(LRCalc && "LRCalc not initialized."); LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); @@ -247,18 +244,18 @@ void LiveIntervals::computeRegUnitInterval(LiveInterval *LI) { for (MCSuperRegIterator Supers(*Roots, TRI, /*IncludeSelf=*/true); Supers.isValid(); ++Supers) { if (!MRI->reg_empty(*Supers)) - LRCalc->createDeadDefs(LI, *Supers); + LRCalc->createDeadDefs(LR, *Supers); } } - // Now extend LI to reach all uses. + // Now extend LR to reach all uses. // Ignore uses of reserved registers. We only track defs of those. for (MCRegUnitRootIterator Roots(Unit, TRI); Roots.isValid(); ++Roots) { for (MCSuperRegIterator Supers(*Roots, TRI, /*IncludeSelf=*/true); Supers.isValid(); ++Supers) { unsigned Reg = *Supers; if (!MRI->isReserved(Reg) && !MRI->reg_empty(Reg)) - LRCalc->extendToUses(LI, Reg); + LRCalc->extendToUses(LR, Reg); } } } @@ -269,11 +266,11 @@ void LiveIntervals::computeRegUnitInterval(LiveInterval *LI) { /// without a corresponding def when entering the entry block or a landing pad. /// void LiveIntervals::computeLiveInRegUnits() { - RegUnitIntervals.resize(TRI->getNumRegUnits()); + RegUnitRanges.resize(TRI->getNumRegUnits()); DEBUG(dbgs() << "Computing live-in reg-units in ABI blocks.\n"); - // Keep track of the intervals allocated. - SmallVector<LiveInterval*, 8> NewIntvs; + // Keep track of the live range sets allocated. + SmallVector<unsigned, 8> NewRanges; // Check all basic blocks for live-ins. for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); @@ -291,23 +288,25 @@ void LiveIntervals::computeLiveInRegUnits() { LIE = MBB->livein_end(); LII != LIE; ++LII) { for (MCRegUnitIterator Units(*LII, TRI); Units.isValid(); ++Units) { unsigned Unit = *Units; - LiveInterval *Intv = RegUnitIntervals[Unit]; - if (!Intv) { - Intv = RegUnitIntervals[Unit] = new LiveInterval(Unit, HUGE_VALF); - NewIntvs.push_back(Intv); + LiveRange *LR = RegUnitRanges[Unit]; + if (!LR) { + LR = RegUnitRanges[Unit] = new LiveRange(); + NewRanges.push_back(Unit); } - VNInfo *VNI = Intv->createDeadDef(Begin, getVNInfoAllocator()); + VNInfo *VNI = LR->createDeadDef(Begin, getVNInfoAllocator()); (void)VNI; DEBUG(dbgs() << ' ' << PrintRegUnit(Unit, TRI) << '#' << VNI->id); } } DEBUG(dbgs() << '\n'); } - DEBUG(dbgs() << "Created " << NewIntvs.size() << " new intervals.\n"); + DEBUG(dbgs() << "Created " << NewRanges.size() << " new intervals.\n"); - // Compute the 'normal' part of the intervals. - for (unsigned i = 0, e = NewIntvs.size(); i != e; ++i) - computeRegUnitInterval(NewIntvs[i]); + // Compute the 'normal' part of the ranges. + for (unsigned i = 0, e = NewRanges.size(); i != e; ++i) { + unsigned Unit = NewRanges[i]; + computeRegUnitRange(*RegUnitRanges[Unit], Unit); + } } @@ -331,7 +330,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, if (UseMI->isDebugValue() || !UseMI->readsVirtualRegister(li->reg)) continue; SlotIndex Idx = getInstructionIndex(UseMI).getRegSlot(); - LiveRangeQuery LRQ(*li, Idx); + LiveQueryResult LRQ = li->Query(Idx); VNInfo *VNI = LRQ.valueIn(); if (!VNI) { // This shouldn't happen: readsVirtualRegister returns true, but there is @@ -350,14 +349,14 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, WorkList.push_back(std::make_pair(Idx, VNI)); } - // Create a new live interval with only minimal live segments per def. - LiveInterval NewLI(li->reg, 0); + // Create new live ranges with only minimal live segments per def. + LiveRange NewLR; for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end(); I != E; ++I) { VNInfo *VNI = *I; if (VNI->isUnused()) continue; - NewLI.addRange(LiveRange(VNI->def, VNI->def.getDeadSlot(), VNI)); + NewLR.addSegment(LiveRange::Segment(VNI->def, VNI->def.getDeadSlot(), VNI)); } // Keep track of the PHIs that are in use. @@ -372,7 +371,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, SlotIndex BlockStart = getMBBStartIdx(MBB); // Extend the live range for VNI to be live at Idx. - if (VNInfo *ExtVNI = NewLI.extendInBlock(BlockStart, Idx)) { + if (VNInfo *ExtVNI = NewLR.extendInBlock(BlockStart, Idx)) { (void)ExtVNI; assert(ExtVNI == VNI && "Unexpected existing value number"); // Is this a PHIDef we haven't seen before? @@ -393,7 +392,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, // VNI is live-in to MBB. DEBUG(dbgs() << " live-in at " << BlockStart << '\n'); - NewLI.addRange(LiveRange(BlockStart, Idx, VNI)); + NewLR.addSegment(LiveRange::Segment(BlockStart, Idx, VNI)); // Make sure VNI is live-out from the predecessors. for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), @@ -414,14 +413,14 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, VNInfo *VNI = *I; if (VNI->isUnused()) continue; - LiveInterval::iterator LII = NewLI.FindLiveRangeContaining(VNI->def); - assert(LII != NewLI.end() && "Missing live range for PHI"); - if (LII->end != VNI->def.getDeadSlot()) + LiveRange::iterator LRI = NewLR.FindSegmentContaining(VNI->def); + assert(LRI != NewLR.end() && "Missing segment for PHI"); + if (LRI->end != VNI->def.getDeadSlot()) continue; if (VNI->isPHIDef()) { // This is a dead PHI. Remove it. VNI->markUnused(); - NewLI.removeRange(*LII); + NewLR.removeSegment(LRI->start, LRI->end); DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n"); CanSeparate = true; } else { @@ -436,23 +435,23 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, } } - // Move the trimmed ranges back. - li->ranges.swap(NewLI.ranges); + // Move the trimmed segments back. + li->segments.swap(NewLR.segments); DEBUG(dbgs() << "Shrunk: " << *li << '\n'); return CanSeparate; } -void LiveIntervals::extendToIndices(LiveInterval *LI, +void LiveIntervals::extendToIndices(LiveRange &LR, ArrayRef<SlotIndex> Indices) { assert(LRCalc && "LRCalc not initialized."); LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); for (unsigned i = 0, e = Indices.size(); i != e; ++i) - LRCalc->extend(LI, Indices[i]); + LRCalc->extend(LR, Indices[i]); } void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill, SmallVectorImpl<SlotIndex> *EndPoints) { - LiveRangeQuery LRQ(*LI, Kill); + LiveQueryResult LRQ = LI->Query(Kill); VNInfo *VNI = LRQ.valueOut(); if (!VNI) return; @@ -463,13 +462,13 @@ void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill, // If VNI isn't live out from KillMBB, the value is trivially pruned. if (LRQ.endPoint() < MBBEnd) { - LI->removeRange(Kill, LRQ.endPoint()); + LI->removeSegment(Kill, LRQ.endPoint()); if (EndPoints) EndPoints->push_back(LRQ.endPoint()); return; } // VNI is live out of KillMBB. - LI->removeRange(Kill, MBBEnd); + LI->removeSegment(Kill, MBBEnd); if (EndPoints) EndPoints->push_back(MBBEnd); // Find all blocks that are reachable from KillMBB without leaving VNI's live @@ -487,23 +486,23 @@ void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill, // Check if VNI is live in to MBB. tie(MBBStart, MBBEnd) = Indexes->getMBBRange(MBB); - LiveRangeQuery LRQ(*LI, MBBStart); + LiveQueryResult LRQ = LI->Query(MBBStart); if (LRQ.valueIn() != VNI) { - // This block isn't part of the VNI live range. Prune the search. + // This block isn't part of the VNI segment. Prune the search. I.skipChildren(); continue; } // Prune the search if VNI is killed in MBB. if (LRQ.endPoint() < MBBEnd) { - LI->removeRange(MBBStart, LRQ.endPoint()); + LI->removeSegment(MBBStart, LRQ.endPoint()); if (EndPoints) EndPoints->push_back(LRQ.endPoint()); I.skipChildren(); continue; } // VNI is live through MBB. - LI->removeRange(MBBStart, MBBEnd); + LI->removeSegment(MBBStart, MBBEnd); if (EndPoints) EndPoints->push_back(MBBEnd); ++I; } @@ -516,7 +515,7 @@ void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill, void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { // Keep track of regunit ranges. - SmallVector<std::pair<LiveInterval*, LiveInterval::iterator>, 8> RU; + SmallVector<std::pair<LiveRange*, LiveRange::iterator>, 8> RU; for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); @@ -531,13 +530,14 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { RU.clear(); for (MCRegUnitIterator Units(VRM->getPhys(Reg), TRI); Units.isValid(); ++Units) { - LiveInterval *RUInt = &getRegUnit(*Units); - if (RUInt->empty()) + LiveRange &RURanges = getRegUnit(*Units); + if (RURanges.empty()) continue; - RU.push_back(std::make_pair(RUInt, RUInt->find(LI->begin()->end))); + RU.push_back(std::make_pair(&RURanges, RURanges.find(LI->begin()->end))); } - // Every instruction that kills Reg corresponds to a live range end point. + // Every instruction that kills Reg corresponds to a segment range end + // point. for (LiveInterval::iterator RI = LI->begin(), RE = LI->end(); RI != RE; ++RI) { // A block index indicates an MBB edge. @@ -547,7 +547,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { if (!MI) continue; - // Check if any of the reguints are live beyond the end of RI. That could + // Check if any of the regunits are live beyond the end of RI. That could // happen when a physreg is defined as a copy of a virtreg: // // %EAX = COPY %vreg5 @@ -557,12 +557,12 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { // There should be no kill flag on FOO when %vreg5 is rewritten as %EAX. bool CancelKill = false; for (unsigned u = 0, e = RU.size(); u != e; ++u) { - LiveInterval *RInt = RU[u].first; - LiveInterval::iterator &I = RU[u].second; - if (I == RInt->end()) + LiveRange &RRanges = *RU[u].first; + LiveRange::iterator &I = RU[u].second; + if (I == RRanges.end()) continue; - I = RInt->advanceTo(I, RI->end); - if (I == RInt->end() || I->start >= RI->end) + I = RRanges.advanceTo(I, RI->end); + if (I == RRanges.end() || I->start >= RI->end) continue; // I is overlapping RI. CancelKill = true; @@ -625,18 +625,18 @@ LiveIntervals::getSpillWeight(bool isDef, bool isUse, BlockFrequency freq) { return (isDef + isUse) * (freq.getFrequency() * Scale); } -LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg, - MachineInstr* startInst) { - LiveInterval& Interval = getOrCreateInterval(reg); +LiveRange::Segment +LiveIntervals::addSegmentToEndOfBlock(unsigned reg, MachineInstr* startInst) { + LiveInterval& Interval = createEmptyInterval(reg); VNInfo* VN = Interval.getNextValue( SlotIndex(getInstructionIndex(startInst).getRegSlot()), getVNInfoAllocator()); - LiveRange LR( + LiveRange::Segment S( SlotIndex(getInstructionIndex(startInst).getRegSlot()), getMBBEndIdx(startInst->getParent()), VN); - Interval.addRange(LR); + Interval.addSegment(S); - return LR; + return S; } @@ -711,7 +711,7 @@ private: const TargetRegisterInfo& TRI; SlotIndex OldIdx; SlotIndex NewIdx; - SmallPtrSet<LiveInterval*, 8> Updated; + SmallPtrSet<LiveRange*, 8> Updated; bool UpdateFlags; public: @@ -725,7 +725,7 @@ public: // physregs, even those that aren't needed for regalloc, in order to update // kill flags. This is wasteful. Eventually, LiveVariables will strip all kill // flags, and postRA passes will use a live register utility instead. - LiveInterval *getRegUnitLI(unsigned Unit) { + LiveRange *getRegUnitLI(unsigned Unit) { if (UpdateFlags) return &LIS.getRegUnit(Unit); return LIS.getCachedRegUnit(Unit); @@ -750,15 +750,16 @@ public: if (!Reg) continue; if (TargetRegisterInfo::isVirtualRegister(Reg)) { - updateRange(LIS.getInterval(Reg)); + LiveInterval &LI = LIS.getInterval(Reg); + updateRange(LI, Reg); continue; } // For physregs, only update the regunits that actually have a // precomputed live range. for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) - if (LiveInterval *LI = getRegUnitLI(*Units)) - updateRange(*LI); + if (LiveRange *LR = getRegUnitLI(*Units)) + updateRange(*LR, *Units); } if (hasRegMask) updateRegMaskSlots(); @@ -767,26 +768,26 @@ public: private: /// Update a single live range, assuming an instruction has been moved from /// OldIdx to NewIdx. - void updateRange(LiveInterval &LI) { - if (!Updated.insert(&LI)) + void updateRange(LiveRange &LR, unsigned Reg) { + if (!Updated.insert(&LR)) return; DEBUG({ dbgs() << " "; - if (TargetRegisterInfo::isVirtualRegister(LI.reg)) - dbgs() << PrintReg(LI.reg); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + dbgs() << PrintReg(Reg); else - dbgs() << PrintRegUnit(LI.reg, &TRI); - dbgs() << ":\t" << LI << '\n'; + dbgs() << PrintRegUnit(Reg, &TRI); + dbgs() << ":\t" << LR << '\n'; }); if (SlotIndex::isEarlierInstr(OldIdx, NewIdx)) - handleMoveDown(LI); + handleMoveDown(LR); else - handleMoveUp(LI); - DEBUG(dbgs() << " -->\t" << LI << '\n'); - LI.verify(); + handleMoveUp(LR, Reg); + DEBUG(dbgs() << " -->\t" << LR << '\n'); + LR.verify(); } - /// Update LI to reflect an instruction has been moved downwards from OldIdx + /// Update LR to reflect an instruction has been moved downwards from OldIdx /// to NewIdx. /// /// 1. Live def at OldIdx: @@ -800,17 +801,17 @@ private: /// Move def to NewIdx, possibly across another live value. /// /// 4. Def at OldIdx AND at NewIdx: - /// Remove live range [OldIdx;NewIdx) and value defined at OldIdx. + /// Remove segment [OldIdx;NewIdx) and value defined at OldIdx. /// (Happens when bundling multiple defs together). /// /// 5. Value read at OldIdx, killed before NewIdx: /// Extend kill to NewIdx. /// - void handleMoveDown(LiveInterval &LI) { + void handleMoveDown(LiveRange &LR) { // First look for a kill at OldIdx. - LiveInterval::iterator I = LI.find(OldIdx.getBaseIndex()); - LiveInterval::iterator E = LI.end(); - // Is LI even live at OldIdx? + LiveRange::iterator I = LR.find(OldIdx.getBaseIndex()); + LiveRange::iterator E = LR.end(); + // Is LR even live at OldIdx? if (I == E || SlotIndex::isEarlierInstr(OldIdx, I->start)) return; @@ -827,7 +828,7 @@ private: for (MIBundleOperands MO(KillMI); MO.isValid(); ++MO) if (MO->isReg() && MO->isUse()) MO->setIsKill(false); - // Adjust I->end to reach NewIdx. This may temporarily make LI invalid by + // Adjust I->end to reach NewIdx. This may temporarily make LR invalid by // overlapping ranges. Case 5 above. I->end = NewIdx.getRegSlot(I->end.isEarlyClobber()); // If this was a kill, there may also be a def. Otherwise we're done. @@ -856,24 +857,25 @@ private: assert((I->end == OldIdx.getDeadSlot() || SlotIndex::isSameInstr(I->end, NewIdx)) && "Cannot move def below kill"); - LiveInterval::iterator NewI = LI.advanceTo(I, NewIdx.getRegSlot()); + LiveRange::iterator NewI = LR.advanceTo(I, NewIdx.getRegSlot()); if (NewI != E && SlotIndex::isSameInstr(NewI->start, NewIdx)) { // There is an existing def at NewIdx, case 4 above. The def at OldIdx is // coalesced into that value. assert(NewI->valno != DefVNI && "Multiple defs of value?"); - LI.removeValNo(DefVNI); + LR.removeValNo(DefVNI); return; } // There was no existing def at NewIdx. Turn *I into a dead def at NewIdx. - // If the def at OldIdx was dead, we allow it to be moved across other LI + // If the def at OldIdx was dead, we allow it to be moved across other LR // values. The new range should be placed immediately before NewI, move any // intermediate ranges up. assert(NewI != I && "Inconsistent iterators"); std::copy(llvm::next(I), NewI, I); - *llvm::prior(NewI) = LiveRange(DefVNI->def, NewIdx.getDeadSlot(), DefVNI); + *llvm::prior(NewI) + = LiveRange::Segment(DefVNI->def, NewIdx.getDeadSlot(), DefVNI); } - /// Update LI to reflect an instruction has been moved upwards from OldIdx + /// Update LR to reflect an instruction has been moved upwards from OldIdx /// to NewIdx. /// /// 1. Live def at OldIdx: @@ -893,11 +895,11 @@ private: /// Hoist kill to NewIdx, then scan for last kill between NewIdx and /// OldIdx. /// - void handleMoveUp(LiveInterval &LI) { + void handleMoveUp(LiveRange &LR, unsigned Reg) { // First look for a kill at OldIdx. - LiveInterval::iterator I = LI.find(OldIdx.getBaseIndex()); - LiveInterval::iterator E = LI.end(); - // Is LI even live at OldIdx? + LiveRange::iterator I = LR.find(OldIdx.getBaseIndex()); + LiveRange::iterator E = LR.end(); + // Is LR even live at OldIdx? if (I == E || SlotIndex::isEarlierInstr(OldIdx, I->start)) return; @@ -914,7 +916,7 @@ private: if (I == E || !SlotIndex::isSameInstr(I->start, OldIdx)) { // No def, search for the new kill. // This can never be an early clobber kill since there is no def. - llvm::prior(I)->end = findLastUseBefore(LI.reg).getRegSlot(); + llvm::prior(I)->end = findLastUseBefore(Reg).getRegSlot(); return; } } @@ -926,18 +928,18 @@ private: DefVNI->def = NewIdx.getRegSlot(I->start.isEarlyClobber()); // Check for an existing def at NewIdx. - LiveInterval::iterator NewI = LI.find(NewIdx.getRegSlot()); + LiveRange::iterator NewI = LR.find(NewIdx.getRegSlot()); if (SlotIndex::isSameInstr(NewI->start, NewIdx)) { assert(NewI->valno != DefVNI && "Same value defined more than once?"); // There is an existing def at NewIdx. if (I->end.isDead()) { // Case 3: Remove the dead def at OldIdx. - LI.removeValNo(DefVNI); + LR.removeValNo(DefVNI); return; } // Case 4: Replace def at NewIdx with live def at OldIdx. I->start = DefVNI->def; - LI.removeValNo(NewI->valno); + LR.removeValNo(NewI->valno); return; } @@ -948,10 +950,10 @@ private: return; } - // DefVNI is a dead def. It may have been moved across other values in LI, + // DefVNI is a dead def. It may have been moved across other values in LR, // so move I up to NewI. Slide [NewI;I) down one position. std::copy_backward(NewI, I, llvm::next(I)); - *NewI = LiveRange(DefVNI->def, NewIdx.getDeadSlot(), DefVNI); + *NewI = LiveRange::Segment(DefVNI->def, NewIdx.getDeadSlot(), DefVNI); } void updateRegMaskSlots() { @@ -1074,8 +1076,7 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, if (MOI->isReg() && TargetRegisterInfo::isVirtualRegister(MOI->getReg()) && !hasInterval(MOI->getReg())) { - LiveInterval &LI = getOrCreateInterval(MOI->getReg()); - computeVirtRegInterval(&LI); + createAndComputeVirtRegInterval(MOI->getReg()); } } } @@ -1122,9 +1123,9 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, if (LII != LI.begin()) prevStart = llvm::prior(LII)->start; - // FIXME: This could be more efficient if there was a removeRange - // method that returned an iterator. - LI.removeRange(*LII, true); + // FIXME: This could be more efficient if there was a + // removeSegment method that returned an iterator. + LI.removeSegment(*LII, true); if (prevStart.isValid()) LII = LI.find(prevStart); else @@ -1143,13 +1144,14 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, if (!lastUseIdx.isValid()) { VNInfo *VNI = LI.getNextValue(instrIdx.getRegSlot(), VNInfoAllocator); - LiveRange LR(instrIdx.getRegSlot(), instrIdx.getDeadSlot(), VNI); - LII = LI.addRange(LR); + LiveRange::Segment S(instrIdx.getRegSlot(), + instrIdx.getDeadSlot(), VNI); + LII = LI.addSegment(S); } else if (LII->start != instrIdx.getRegSlot()) { VNInfo *VNI = LI.getNextValue(instrIdx.getRegSlot(), VNInfoAllocator); - LiveRange LR(instrIdx.getRegSlot(), lastUseIdx, VNI); - LII = LI.addRange(LR); + LiveRange::Segment S(instrIdx.getRegSlot(), lastUseIdx, VNI); + LII = LI.addSegment(S); } if (MO.getSubReg() && !MO.isUndef()) diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp index dede490..ae086bc 100644 --- a/lib/CodeGen/LiveRangeCalc.cpp +++ b/lib/CodeGen/LiveRangeCalc.cpp @@ -36,11 +36,11 @@ void LiveRangeCalc::reset(const MachineFunction *mf, } -void LiveRangeCalc::createDeadDefs(LiveInterval *LI, unsigned Reg) { +void LiveRangeCalc::createDeadDefs(LiveRange &LR, unsigned Reg) { assert(MRI && Indexes && "call reset() first"); // Visit all def operands. If the same instruction has multiple defs of Reg, - // LI->createDeadDef() will deduplicate. + // LR.createDeadDef() will deduplicate. for (MachineRegisterInfo::def_iterator I = MRI->def_begin(Reg), E = MRI->def_end(); I != E; ++I) { const MachineInstr *MI = &*I; @@ -54,13 +54,13 @@ void LiveRangeCalc::createDeadDefs(LiveInterval *LI, unsigned Reg) { Idx = Indexes->getInstructionIndex(MI) .getRegSlot(I.getOperand().isEarlyClobber()); - // Create the def in LI. This may find an existing def. - LI->createDeadDef(Idx, *Alloc); + // Create the def in LR. This may find an existing def. + LR.createDeadDef(Idx, *Alloc); } } -void LiveRangeCalc::extendToUses(LiveInterval *LI, unsigned Reg) { +void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg) { assert(MRI && Indexes && "call reset() first"); // Visit all operands that read Reg. This may include partial defs. @@ -99,7 +99,7 @@ void LiveRangeCalc::extendToUses(LiveInterval *LI, unsigned Reg) { Idx = Idx.getRegSlot(true); } } - extend(LI, Idx, Reg); + extend(LR, Idx, Reg); } } @@ -125,17 +125,14 @@ void LiveRangeCalc::updateLiveIns() { assert(Seen.test(MBB->getNumber())); LiveOut[MBB] = LiveOutPair(I->Value, (MachineDomTreeNode *)0); } - Updater.setDest(I->LI); + Updater.setDest(&I->LR); Updater.add(Start, End, I->Value); } LiveIn.clear(); } -void LiveRangeCalc::extend(LiveInterval *LI, - SlotIndex Kill, - unsigned PhysReg) { - assert(LI && "Missing live range"); +void LiveRangeCalc::extend(LiveRange &LR, SlotIndex Kill, unsigned PhysReg) { assert(Kill.isValid() && "Invalid SlotIndex"); assert(Indexes && "Missing SlotIndexes"); assert(DomTree && "Missing dominator tree"); @@ -144,14 +141,14 @@ void LiveRangeCalc::extend(LiveInterval *LI, assert(KillMBB && "No MBB at Kill"); // Is there a def in the same MBB we can extend? - if (LI->extendInBlock(Indexes->getMBBStartIdx(KillMBB), Kill)) + if (LR.extendInBlock(Indexes->getMBBStartIdx(KillMBB), Kill)) return; // Find the single reaching def, or determine if Kill is jointly dominated by // multiple values, and we may need to create even more phi-defs to preserve // VNInfo SSA form. Perform a search for all predecessor blocks where we // know the dominating VNInfo. - if (findReachingDefs(LI, KillMBB, Kill, PhysReg)) + if (findReachingDefs(LR, *KillMBB, Kill, PhysReg)) return; // When there were multiple different values, we may need new PHIs. @@ -170,13 +167,11 @@ void LiveRangeCalc::calculateValues() { } -bool LiveRangeCalc::findReachingDefs(LiveInterval *LI, - MachineBasicBlock *KillMBB, - SlotIndex Kill, - unsigned PhysReg) { - unsigned KillMBBNum = KillMBB->getNumber(); +bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB, + SlotIndex Kill, unsigned PhysReg) { + unsigned KillMBBNum = KillMBB.getNumber(); - // Block numbers where LI should be live-in. + // Block numbers where LR should be live-in. SmallVector<unsigned, 16> WorkList(1, KillMBBNum); // Remember if we have seen more than one value. @@ -203,7 +198,7 @@ bool LiveRangeCalc::findReachingDefs(LiveInterval *LI, #endif for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { + PE = MBB->pred_end(); PI != PE; ++PI) { MachineBasicBlock *Pred = *PI; // Is this a known live-out block? @@ -221,7 +216,7 @@ bool LiveRangeCalc::findReachingDefs(LiveInterval *LI, // First time we see Pred. Try to determine the live-out value, but set // it as null if Pred is live-through with an unknown value. - VNInfo *VNI = LI->extendInBlock(Start, End); + VNInfo *VNI = LR.extendInBlock(Start, End); setLiveOutValue(Pred, VNI); if (VNI) { if (TheVNI && TheVNI != VNI) @@ -231,7 +226,7 @@ bool LiveRangeCalc::findReachingDefs(LiveInterval *LI, } // No, we need a live-in value for Pred as well - if (Pred != KillMBB) + if (Pred != &KillMBB) WorkList.push_back(Pred->getNumber()); else // Loopback to KillMBB, so value is really live through. @@ -248,9 +243,9 @@ bool LiveRangeCalc::findReachingDefs(LiveInterval *LI, // If a unique reaching def was found, blit in the live ranges immediately. if (UniqueVNI) { - LiveRangeUpdater Updater(LI); - for (SmallVectorImpl<unsigned>::const_iterator - I = WorkList.begin(), E = WorkList.end(); I != E; ++I) { + LiveRangeUpdater Updater(&LR); + for (SmallVectorImpl<unsigned>::const_iterator I = WorkList.begin(), + E = WorkList.end(); I != E; ++I) { SlotIndex Start, End; tie(Start, End) = Indexes->getMBBRange(*I); // Trim the live range in KillMBB. @@ -270,8 +265,8 @@ bool LiveRangeCalc::findReachingDefs(LiveInterval *LI, for (SmallVectorImpl<unsigned>::const_iterator I = WorkList.begin(), E = WorkList.end(); I != E; ++I) { MachineBasicBlock *MBB = MF->getBlockNumbered(*I); - addLiveInBlock(LI, DomTree->getNode(MBB)); - if (MBB == KillMBB) + addLiveInBlock(LR, DomTree->getNode(MBB)); + if (MBB == &KillMBB) LiveIn.back().Kill = Kill; } @@ -348,16 +343,17 @@ void LiveRangeCalc::updateSSA() { assert(Alloc && "Need VNInfo allocator to create PHI-defs"); SlotIndex Start, End; tie(Start, End) = Indexes->getMBBRange(MBB); - VNInfo *VNI = I->LI->getNextValue(Start, *Alloc); + LiveRange &LR = I->LR; + VNInfo *VNI = LR.getNextValue(Start, *Alloc); I->Value = VNI; // This block is done, we know the final value. I->DomNode = 0; // Add liveness since updateLiveIns now skips this node. if (I->Kill.isValid()) - I->LI->addRange(LiveRange(Start, I->Kill, VNI)); + LR.addSegment(LiveInterval::Segment(Start, I->Kill, VNI)); else { - I->LI->addRange(LiveRange(Start, End, VNI)); + LR.addSegment(LiveInterval::Segment(Start, End, VNI)); LOP = LiveOutPair(VNI, Node); } } else if (IDomValue.first) { diff --git a/lib/CodeGen/LiveRangeCalc.h b/lib/CodeGen/LiveRangeCalc.h index 57cab7b..a3a3fbb 100644 --- a/lib/CodeGen/LiveRangeCalc.h +++ b/lib/CodeGen/LiveRangeCalc.h @@ -75,9 +75,9 @@ class LiveRangeCalc { /// LiveInBlock - Information about a basic block where a live range is known /// to be live-in, but the value has not yet been determined. struct LiveInBlock { - // LI - The live range that is live-in to this block. The algorithms can + // The live range set that is live-in to this block. The algorithms can // handle multiple non-overlapping live ranges simultaneously. - LiveInterval *LI; + LiveRange &LR; // DomNode - Dominator tree node for the block. // Cleared when the final value has been determined and LI has been updated. @@ -91,8 +91,8 @@ class LiveRangeCalc { // Live-in value filled in by updateSSA once it is known. VNInfo *Value; - LiveInBlock(LiveInterval *li, MachineDomTreeNode *node, SlotIndex kill) - : LI(li), DomNode(node), Kill(kill), Value(0) {} + LiveInBlock(LiveRange &LR, MachineDomTreeNode *node, SlotIndex kill) + : LR(LR), DomNode(node), Kill(kill), Value(0) {} }; /// LiveIn - Work list of blocks where the live-in value has yet to be @@ -111,10 +111,8 @@ class LiveRangeCalc { /// are added to the LiveIn array, and the function returns false. /// /// PhysReg, when set, is used to verify live-in lists on basic blocks. - bool findReachingDefs(LiveInterval *LI, - MachineBasicBlock *KillMBB, - SlotIndex Kill, - unsigned PhysReg); + bool findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB, + SlotIndex Kill, unsigned PhysReg); /// updateSSA - Compute the values that will be live in to all requested /// blocks in LiveIn. Create PHI-def values as required to preserve SSA form. @@ -146,10 +144,6 @@ public: MachineDominatorTree*, VNInfo::Allocator*); - /// calculate - Calculate the live range of a virtual register from its defs - /// and uses. LI must be empty with no values. - void calculate(LiveInterval *LI); - //===--------------------------------------------------------------------===// // Mid-level interface. //===--------------------------------------------------------------------===// @@ -165,27 +159,27 @@ public: /// single existing value, Alloc may be null. /// /// PhysReg, when set, is used to verify live-in lists on basic blocks. - void extend(LiveInterval *LI, SlotIndex Kill, unsigned PhysReg = 0); + void extend(LiveRange &LR, SlotIndex Kill, unsigned PhysReg = 0); /// createDeadDefs - Create a dead def in LI for every def operand of Reg. /// Each instruction defining Reg gets a new VNInfo with a corresponding /// minimal live range. - void createDeadDefs(LiveInterval *LI, unsigned Reg); + void createDeadDefs(LiveRange &LR, unsigned Reg); /// createDeadDefs - Create a dead def in LI for every def of LI->reg. - void createDeadDefs(LiveInterval *LI) { - createDeadDefs(LI, LI->reg); + void createDeadDefs(LiveInterval &LI) { + createDeadDefs(LI, LI.reg); } /// extendToUses - Extend the live range of LI to reach all uses of Reg. /// /// All uses must be jointly dominated by existing liveness. PHI-defs are /// inserted as needed to preserve SSA form. - void extendToUses(LiveInterval *LI, unsigned Reg); + void extendToUses(LiveRange &LR, unsigned Reg); /// extendToUses - Extend the live range of LI to reach all uses of LI->reg. - void extendToUses(LiveInterval *LI) { - extendToUses(LI, LI->reg); + void extendToUses(LiveInterval &LI) { + extendToUses(LI, LI.reg); } //===--------------------------------------------------------------------===// @@ -216,15 +210,15 @@ public: /// function can only be called once per basic block. Once the live-in value /// has been determined, calculateValues() will add liveness to LI. /// - /// @param LI The live range that is live-in to the block. + /// @param LR The live range that is live-in to the block. /// @param DomNode The domtree node for the block. /// @param Kill Index in block where LI is killed. If the value is /// live-through, set Kill = SLotIndex() and also call /// setLiveOutValue(MBB, 0). - void addLiveInBlock(LiveInterval *LI, + void addLiveInBlock(LiveRange &LR, MachineDomTreeNode *DomNode, SlotIndex Kill = SlotIndex()) { - LiveIn.push_back(LiveInBlock(LI, DomNode, Kill)); + LiveIn.push_back(LiveInBlock(LR, DomNode, Kill)); } /// calculateValues - Calculate the value that will be live-in to each block diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp index 792ef54..cb70c43 100644 --- a/lib/CodeGen/LiveRangeEdit.cpp +++ b/lib/CodeGen/LiveRangeEdit.cpp @@ -30,17 +30,23 @@ STATISTIC(NumFracRanges, "Number of live ranges fractured by DCE"); void LiveRangeEdit::Delegate::anchor() { } -LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg) { +LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(unsigned OldReg) { unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); if (VRM) { - VRM->grow(); VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg)); } - LiveInterval &LI = LIS.getOrCreateInterval(VReg); - NewRegs.push_back(&LI); + LiveInterval &LI = LIS.createEmptyInterval(VReg); return LI; } +unsigned LiveRangeEdit::createFrom(unsigned OldReg) { + unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); + if (VRM) { + VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg)); + } + return VReg; +} + bool LiveRangeEdit::checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI, AliasAnalysis *aa) { @@ -256,9 +262,9 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) { else if (MOI->isDef()) { for (MCRegUnitIterator Units(Reg, MRI.getTargetRegisterInfo()); Units.isValid(); ++Units) { - if (LiveInterval *LI = LIS.getCachedRegUnit(*Units)) { - if (VNInfo *VNI = LI->getVNInfoAt(Idx)) - LI->removeValNo(VNI); + if (LiveRange *LR = LIS.getCachedRegUnit(*Units)) { + if (VNInfo *VNI = LR->getVNInfoAt(Idx)) + LR->removeValNo(VNI); } } } @@ -272,7 +278,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) { // Always shrink COPY uses that probably come from live range splitting. if (MI->readsVirtualRegister(Reg) && (MI->isCopy() || MOI->isDef() || MRI.hasOneNonDBGUse(Reg) || - LI.killedAt(Idx))) + LI.Query(Idx).isKill())) ToShrink.insert(&LI); // Remove defined value. @@ -360,7 +366,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, if (BeingSpilled) continue; // LI may have been separated, create new intervals. - LI->RenumberValues(LIS); + LI->RenumberValues(); ConnectedVNInfoEqClasses ConEQ(LIS); unsigned NumComp = ConEQ.Classify(LI); if (NumComp <= 1) @@ -370,7 +376,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, DEBUG(dbgs() << NumComp << " components: " << *LI << '\n'); SmallVector<LiveInterval*, 8> Dups(1, LI); for (unsigned i = 1; i != NumComp; ++i) { - Dups.push_back(&createFrom(LI->reg)); + Dups.push_back(&createEmptyIntervalFrom(LI->reg)); // If LI is an original interval that hasn't been split yet, make the new // intervals their own originals instead of referring to LI. The original // interval must contain all the split products, and LI doesn't. @@ -387,16 +393,27 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, } } +// Keep track of new virtual registers created via +// MachineRegisterInfo::createVirtualRegister. +void +LiveRangeEdit::MRI_NoteNewVirtualRegister(unsigned VReg) +{ + if (VRM) + VRM->grow(); + + NewRegs.push_back(VReg); +} + void LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF, const MachineLoopInfo &Loops, const MachineBlockFrequencyInfo &MBFI) { VirtRegAuxInfo VRAI(MF, LIS, Loops, MBFI); - for (iterator I = begin(), E = end(); I != E; ++I) { - LiveInterval &LI = **I; + for (unsigned I = 0, Size = size(); I < Size; ++I) { + LiveInterval &LI = LIS.getInterval(get(I)); if (MRI.recomputeRegClass(LI.reg, MF.getTarget())) DEBUG(dbgs() << "Inflated " << PrintReg(LI.reg) << " to " << MRI.getRegClass(LI.reg)->getName() << '\n'); - VRAI.CalculateWeightAndHint(LI); + VRAI.calculateSpillWeightAndHint(LI); } } diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp index 0ef069f..1d801ac 100644 --- a/lib/CodeGen/LiveRegMatrix.cpp +++ b/lib/CodeGen/LiveRegMatrix.cpp @@ -119,9 +119,11 @@ bool LiveRegMatrix::checkRegUnitInterference(LiveInterval &VirtReg, if (VirtReg.empty()) return false; CoalescerPair CP(VirtReg.reg, PhysReg, *TRI); - for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) - if (VirtReg.overlaps(LIS->getRegUnit(*Units), CP, *LIS->getSlotIndexes())) + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + const LiveRange &UnitRange = LIS->getRegUnit(*Units); + if (VirtReg.overlaps(UnitRange, CP, *LIS->getSlotIndexes())) return true; + } return false; } diff --git a/lib/CodeGen/LiveRegUnits.cpp b/lib/CodeGen/LiveRegUnits.cpp new file mode 100644 index 0000000..6221ca2 --- /dev/null +++ b/lib/CodeGen/LiveRegUnits.cpp @@ -0,0 +1,111 @@ +//===-- LiveInterval.cpp - Live Interval Representation -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LiveRegUnits utility for tracking liveness of +// physical register units across machine instructions in forward or backward +// order. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/LiveRegUnits.h" +#include "llvm/CodeGen/MachineInstrBundle.h" +using namespace llvm; + +/// Return true if the given MachineOperand clobbers the given register unit. +/// A register unit is only clobbered if all its super-registers are clobbered. +static bool operClobbersUnit(const MachineOperand *MO, unsigned Unit, + const MCRegisterInfo *MCRI) { + for (MCRegUnitRootIterator RI(Unit, MCRI); RI.isValid(); ++RI) { + for (MCSuperRegIterator SI(*RI, MCRI, true); SI.isValid(); ++SI) { + if (!MO->clobbersPhysReg(*SI)) + return false; + } + } + return true; +} + +/// We assume the high bits of a physical super register are not preserved +/// unless the instruction has an implicit-use operand reading the +/// super-register or a register unit for the upper bits is available. +void LiveRegUnits::removeRegsInMask(const MachineOperand &Op, + const MCRegisterInfo &MCRI) { + SparseSet<unsigned>::iterator LUI = LiveUnits.begin(); + while (LUI != LiveUnits.end()) { + if (operClobbersUnit(&Op, *LUI, &MCRI)) + LUI = LiveUnits.erase(LUI); + else + ++LUI; + } +} + +void LiveRegUnits::stepBackward(const MachineInstr &MI, + const MCRegisterInfo &MCRI) { + // Remove defined registers and regmask kills from the set. + for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) { + if (O->isReg()) { + if (!O->isDef()) + continue; + unsigned Reg = O->getReg(); + if (Reg == 0) + continue; + removeReg(Reg, MCRI); + } else if (O->isRegMask()) { + removeRegsInMask(*O, MCRI); + } + } + // Add uses to the set. + for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) { + if (!O->isReg() || !O->readsReg() || O->isUndef()) + continue; + unsigned Reg = O->getReg(); + if (Reg == 0) + continue; + addReg(Reg, MCRI); + } +} + +/// Uses with kill flag get removed from the set, defs added. If possible +/// use StepBackward() instead of this function because some kill flags may +/// be missing. +void LiveRegUnits::stepForward(const MachineInstr &MI, + const MCRegisterInfo &MCRI) { + SmallVector<unsigned, 4> Defs; + // Remove killed registers from the set. + for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) { + if (O->isReg()) { + unsigned Reg = O->getReg(); + if (Reg == 0) + continue; + if (O->isDef()) { + if (!O->isDead()) + Defs.push_back(Reg); + } else { + if (!O->isKill()) + continue; + assert(O->isUse()); + removeReg(Reg, MCRI); + } + } else if (O->isRegMask()) { + removeRegsInMask(*O, MCRI); + } + } + // Add defs to the set. + for (unsigned i = 0, e = Defs.size(); i != e; ++i) { + addReg(Defs[i], MCRI); + } +} + +/// Adds all registers in the live-in list of block @p BB. +void LiveRegUnits::addLiveIns(const MachineBasicBlock *MBB, + const MCRegisterInfo &MCRI) { + for (MachineBasicBlock::livein_iterator L = MBB->livein_begin(), + LE = MBB->livein_end(); L != LE; ++L) { + addReg(*L, MCRI); + } +} diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 5633271..ca71e3b 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -861,7 +861,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { LiveInterval &LI = LIS->getInterval(Reg); VNInfo *VNI = LI.getVNInfoAt(PrevIndex); assert(VNI && "PHI sources should be live out of their predecessors."); - LI.addRange(LiveRange(StartIndex, EndIndex, VNI)); + LI.addSegment(LiveInterval::Segment(StartIndex, EndIndex, VNI)); } } } @@ -880,9 +880,9 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { if (isLiveOut && isLastMBB) { VNInfo *VNI = LI.getVNInfoAt(PrevIndex); assert(VNI && "LiveInterval should have VNInfo where it is live."); - LI.addRange(LiveRange(StartIndex, EndIndex, VNI)); + LI.addSegment(LiveInterval::Segment(StartIndex, EndIndex, VNI)); } else if (!isLiveOut && !isLastMBB) { - LI.removeRange(StartIndex, EndIndex); + LI.removeSegment(StartIndex, EndIndex); } } diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 06bb80a..295b450 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -647,12 +647,15 @@ void MachineInstr::addOperand(MachineFunction &MF, const MachineOperand &Op) { } } +#ifndef NDEBUG + bool isMetaDataOp = Op.getType() == MachineOperand::MO_Metadata; // OpNo now points as the desired insertion point. Unless this is a variadic // instruction, only implicit regs are allowed beyond MCID->getNumOperands(). // RegMask operands go between the explicit and implicit operands. assert((isImpReg || Op.isRegMask() || MCID->isVariadic() || - OpNo < MCID->getNumOperands()) && + OpNo < MCID->getNumOperands() || isMetaDataOp) && "Trying to add an operand to a machine instr that is already done!"); +#endif MachineRegisterInfo *MRI = getRegInfo(); @@ -1702,31 +1705,31 @@ void MachineInstr::clearRegisterKills(unsigned Reg, } } -bool MachineInstr::addRegisterDead(unsigned IncomingReg, +bool MachineInstr::addRegisterDead(unsigned Reg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound) { - bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(IncomingReg); + bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(Reg); bool hasAliases = isPhysReg && - MCRegAliasIterator(IncomingReg, RegInfo, false).isValid(); + MCRegAliasIterator(Reg, RegInfo, false).isValid(); bool Found = false; SmallVector<unsigned,4> DeadOps; for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { MachineOperand &MO = getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); - if (!Reg) + unsigned MOReg = MO.getReg(); + if (!MOReg) continue; - if (Reg == IncomingReg) { + if (MOReg == Reg) { MO.setIsDead(); Found = true; } else if (hasAliases && MO.isDead() && - TargetRegisterInfo::isPhysicalRegister(Reg)) { + TargetRegisterInfo::isPhysicalRegister(MOReg)) { // There exists a super-register that's marked dead. - if (RegInfo->isSuperRegister(IncomingReg, Reg)) + if (RegInfo->isSuperRegister(Reg, MOReg)) return true; - if (RegInfo->isSubRegister(IncomingReg, Reg)) + if (RegInfo->isSubRegister(Reg, MOReg)) DeadOps.push_back(i); } } @@ -1746,7 +1749,7 @@ bool MachineInstr::addRegisterDead(unsigned IncomingReg, if (Found || !AddIfNotFound) return Found; - addOperand(MachineOperand::CreateReg(IncomingReg, + addOperand(MachineOperand::CreateReg(Reg, true /*IsDef*/, true /*IsImp*/, false /*IsKill*/, @@ -1754,21 +1757,21 @@ bool MachineInstr::addRegisterDead(unsigned IncomingReg, return true; } -void MachineInstr::addRegisterDefined(unsigned IncomingReg, +void MachineInstr::addRegisterDefined(unsigned Reg, const TargetRegisterInfo *RegInfo) { - if (TargetRegisterInfo::isPhysicalRegister(IncomingReg)) { - MachineOperand *MO = findRegisterDefOperand(IncomingReg, false, RegInfo); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + MachineOperand *MO = findRegisterDefOperand(Reg, false, RegInfo); if (MO) return; } else { for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { const MachineOperand &MO = getOperand(i); - if (MO.isReg() && MO.getReg() == IncomingReg && MO.isDef() && + if (MO.isReg() && MO.getReg() == Reg && MO.isDef() && MO.getSubReg() == 0) return; } } - addOperand(MachineOperand::CreateReg(IncomingReg, + addOperand(MachineOperand::CreateReg(Reg, true /*IsDef*/, true /*IsImp*/)); } diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 6ad4e39..104eacd 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -468,12 +468,12 @@ void MachineLICM::ProcessMI(MachineInstr *MI, for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) { if (PhysRegDefs.test(*AS)) PhysRegClobbers.set(*AS); - if (PhysRegClobbers.test(*AS)) - // MI defined register is seen defined by another instruction in - // the loop, it cannot be a LICM candidate. - RuledOut = true; PhysRegDefs.set(*AS); } + if (PhysRegClobbers.test(Reg)) + // MI defined register is seen defined by another instruction in + // the loop, it cannot be a LICM candidate. + RuledOut = true; } // Only consider reloads for now and remats which do not have register @@ -502,7 +502,7 @@ void MachineLICM::HoistRegionPostRA() { // Walk the entire region, count number of defs for each register, and // collect potential LICM candidates. - const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks(); + const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks(); for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { MachineBasicBlock *BB = Blocks[i]; @@ -584,7 +584,7 @@ void MachineLICM::HoistRegionPostRA() { /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current /// loop, and make sure it is not killed by any instructions in the loop. void MachineLICM::AddToLiveIns(unsigned Reg) { - const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks(); + const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks(); for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { MachineBasicBlock *BB = Blocks[i]; if (!BB->isLiveIn(Reg)) diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index 7f2c0ca..f8b8796 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -19,8 +19,11 @@ using namespace llvm; +// Pin the vtable to this file. +void MachineRegisterInfo::Delegate::anchor() {} + MachineRegisterInfo::MachineRegisterInfo(const TargetMachine &TM) - : TM(TM), IsSSA(true), TracksLiveness(true) { + : TM(TM), TheDelegate(0), IsSSA(true), TracksLiveness(true) { VRegInfo.reserve(256); RegAllocHints.reserve(256); UsedRegUnits.resize(getTargetRegisterInfo()->getNumRegUnits()); @@ -108,6 +111,8 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){ VRegInfo.grow(Reg); VRegInfo[Reg].first = RegClass; RegAllocHints.grow(Reg); + if (TheDelegate) + TheDelegate->MRI_NoteNewVirtualRegister(Reg); return Reg; } diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index a6c5a9f..e71c4df 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -53,6 +53,12 @@ static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden, static bool ViewMISchedDAGs = false; #endif // NDEBUG +static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden, + cl::desc("Enable register pressure scheduling."), cl::init(true)); + +static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden, + cl::desc("Enable cyclic critical path analysis."), cl::init(true)); + static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden, cl::desc("Enable load clustering."), cl::init(true)); @@ -66,6 +72,10 @@ static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden, // DAG subtrees must have at least this many nodes. static const unsigned MinSubtreeSize = 8; +// Pin the vtables to this file. +void MachineSchedStrategy::anchor() {} +void ScheduleDAGMutation::anchor() {} + //===----------------------------------------------------------------------===// // Machine Instruction Scheduling Pass and Registry //===----------------------------------------------------------------------===// @@ -95,6 +105,9 @@ public: virtual void print(raw_ostream &O, const Module* = 0) const; static char ID; // Class identification, replacement for typeinfo + +protected: + ScheduleDAGInstrs *createMachineScheduler(); }; } // namespace @@ -149,12 +162,13 @@ DefaultSchedRegistry("default", "Use the target's default scheduler choice.", /// Forward declare the standard machine scheduler. This will be used as the /// default scheduler if the target does not set a default. -static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C); +static ScheduleDAGInstrs *createGenericSched(MachineSchedContext *C); /// Decrement this iterator until reaching the top or a non-debug instr. -static MachineBasicBlock::iterator -priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) { +static MachineBasicBlock::const_iterator +priorNonDebug(MachineBasicBlock::const_iterator I, + MachineBasicBlock::const_iterator Beg) { assert(I != Beg && "reached the top of the region, cannot decrement"); while (--I != Beg) { if (!I->isDebugValue()) @@ -163,10 +177,19 @@ priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) { return I; } +/// Non-const version. +static MachineBasicBlock::iterator +priorNonDebug(MachineBasicBlock::iterator I, + MachineBasicBlock::const_iterator Beg) { + return const_cast<MachineInstr*>( + &*priorNonDebug(MachineBasicBlock::const_iterator(I), Beg)); +} + /// If this iterator is a debug value, increment until reaching the End or a /// non-debug instruction. -static MachineBasicBlock::iterator -nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) { +static MachineBasicBlock::const_iterator +nextIfDebug(MachineBasicBlock::const_iterator I, + MachineBasicBlock::const_iterator End) { for(; I != End; ++I) { if (!I->isDebugValue()) break; @@ -174,6 +197,34 @@ nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) { return I; } +/// Non-const version. +static MachineBasicBlock::iterator +nextIfDebug(MachineBasicBlock::iterator I, + MachineBasicBlock::const_iterator End) { + // Cast the return value to nonconst MachineInstr, then cast to an + // instr_iterator, which does not check for null, finally return a + // bundle_iterator. + return MachineBasicBlock::instr_iterator( + const_cast<MachineInstr*>( + &*nextIfDebug(MachineBasicBlock::const_iterator(I), End))); +} + +/// Instantiate a ScheduleDAGInstrs that will be owned by the caller. +ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() { + // Select the scheduler, or set the default. + MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt; + if (Ctor != useDefaultMachineSched) + return Ctor(this); + + // Get the default scheduler set by the target for this function. + ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this); + if (Scheduler) + return Scheduler; + + // Default to GenericScheduler. + return createGenericSched(this); +} + /// Top-level MachineScheduler pass driver. /// /// Visit blocks in function order. Divide each block into scheduling regions @@ -209,18 +260,9 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { } RegClassInfo->runOnMachineFunction(*MF); - // Select the scheduler, or set the default. - MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt; - if (Ctor == useDefaultMachineSched) { - // Get the default scheduler set by the target. - Ctor = MachineSchedRegistry::getDefault(); - if (!Ctor) { - Ctor = createConvergingSched; - MachineSchedRegistry::setDefault(Ctor); - } - } - // Instantiate the selected scheduler. - OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this)); + // Instantiate the selected scheduler for this target, function, and + // optimization level. + OwningPtr<ScheduleDAGInstrs> Scheduler(createMachineScheduler()); // Visit all machine basic blocks. // @@ -255,14 +297,15 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { // The next region starts above the previous region. Look backward in the // instruction stream until we find the nearest boundary. + unsigned NumRegionInstrs = 0; MachineBasicBlock::iterator I = RegionEnd; - for(;I != MBB->begin(); --I, --RemainingInstrs) { + for(;I != MBB->begin(); --I, --RemainingInstrs, ++NumRegionInstrs) { if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF)) break; } // Notify the scheduler of the region, even if we may skip scheduling // it. Perhaps it still needs to be bundled. - Scheduler->enterRegion(MBB, I, RegionEnd, RemainingInstrs); + Scheduler->enterRegion(MBB, I, RegionEnd, NumRegionInstrs); // Skip empty scheduling regions (0 or 1 schedulable instructions). if (I == RegionEnd || I == llvm::prior(RegionEnd)) { @@ -277,7 +320,8 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { << "\n From: " << *I << " To: "; if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; else dbgs() << "End"; - dbgs() << " Remaining: " << RemainingInstrs << "\n"); + dbgs() << " RegionInstrs: " << NumRegionInstrs + << " Remaining: " << RemainingInstrs << "\n"); // Schedule a region: possibly reorder instructions. // This invalidates 'RegionEnd' and 'I'. @@ -446,13 +490,19 @@ bool ScheduleDAGMI::checkSchedLimit() { void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, - unsigned endcount) + unsigned regioninstrs) { - ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount); + ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs); // For convenience remember the end of the liveness region. LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd); + + SUPressureDiffs.clear(); + + SchedImpl->initPolicy(begin, end, regioninstrs); + + ShouldTrackPressure = SchedImpl->shouldTrackPressure(); } // Setup the register pressure trackers for the top scheduled top and bottom @@ -483,9 +533,16 @@ void ScheduleDAGMI::initRegPressure() { dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI)); }; + // For each live out vreg reduce the pressure change associated with other + // uses of the same vreg below the live-out reaching def. + updatePressureDiffs(RPTracker.getPressure().LiveOutRegs); + // Account for liveness generated by the region boundary. - if (LiveRegionEnd != RegionEnd) - BotRPTracker.recede(); + if (LiveRegionEnd != RegionEnd) { + SmallVector<unsigned, 8> LiveUses; + BotRPTracker.recede(&LiveUses); + updatePressureDiffs(LiveUses); + } assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom"); @@ -500,34 +557,83 @@ void ScheduleDAGMI::initRegPressure() { DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit << " Actual " << RegionPressure[i] << "\n"); - RegionCriticalPSets.push_back(PressureElement(i, 0)); + RegionCriticalPSets.push_back(PressureChange(i)); } } DEBUG(dbgs() << "Excess PSets: "; for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i) dbgs() << TRI->getRegPressureSetName( - RegionCriticalPSets[i].PSetID) << " "; + RegionCriticalPSets[i].getPSet()) << " "; dbgs() << "\n"); } -// FIXME: When the pressure tracker deals in pressure differences then we won't -// iterate over all RegionCriticalPSets[i]. void ScheduleDAGMI:: -updateScheduledPressure(const std::vector<unsigned> &NewMaxPressure) { - for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) { - unsigned ID = RegionCriticalPSets[i].PSetID; - int &MaxUnits = RegionCriticalPSets[i].UnitIncrease; - if ((int)NewMaxPressure[ID] > MaxUnits) - MaxUnits = NewMaxPressure[ID]; +updateScheduledPressure(const SUnit *SU, + const std::vector<unsigned> &NewMaxPressure) { + const PressureDiff &PDiff = getPressureDiff(SU); + unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size(); + for (PressureDiff::const_iterator I = PDiff.begin(), E = PDiff.end(); + I != E; ++I) { + if (!I->isValid()) + break; + unsigned ID = I->getPSet(); + while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID) + ++CritIdx; + if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) { + if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc() + && NewMaxPressure[ID] <= INT16_MAX) + RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]); + } + unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID); + if (NewMaxPressure[ID] >= Limit - 2) { + DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": " + << NewMaxPressure[ID] << " > " << Limit << "(+ " + << BotRPTracker.getLiveThru()[ID] << " livethru)\n"); + } } - DEBUG( - for (unsigned i = 0, e = NewMaxPressure.size(); i < e; ++i) { - unsigned Limit = RegClassInfo->getRegPressureSetLimit(i); - if (NewMaxPressure[i] > Limit ) { - dbgs() << " " << TRI->getRegPressureSetName(i) << ": " - << NewMaxPressure[i] << " > " << Limit << "\n"; +} + +/// Update the PressureDiff array for liveness after scheduling this +/// instruction. +void ScheduleDAGMI::updatePressureDiffs(ArrayRef<unsigned> LiveUses) { + for (unsigned LUIdx = 0, LUEnd = LiveUses.size(); LUIdx != LUEnd; ++LUIdx) { + /// FIXME: Currently assuming single-use physregs. + unsigned Reg = LiveUses[LUIdx]; + DEBUG(dbgs() << " LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n"); + if (!TRI->isVirtualRegister(Reg)) + continue; + + // This may be called before CurrentBottom has been initialized. However, + // BotRPTracker must have a valid position. We want the value live into the + // instruction or live out of the block, so ask for the previous + // instruction's live-out. + const LiveInterval &LI = LIS->getInterval(Reg); + VNInfo *VNI; + MachineBasicBlock::const_iterator I = + nextIfDebug(BotRPTracker.getPos(), BB->end()); + if (I == BB->end()) + VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB)); + else { + LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(I)); + VNI = LRQ.valueIn(); + } + // RegisterPressureTracker guarantees that readsReg is true for LiveUses. + assert(VNI && "No live value at use."); + for (VReg2UseMap::iterator + UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) { + SUnit *SU = UI->SU; + DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") " + << *SU->getInstr()); + // If this use comes before the reaching def, it cannot be a last use, so + // descrease its pressure change. + if (!SU->isScheduled && SU != &ExitSU) { + LiveQueryResult LRQ + = LI.Query(LIS->getInstructionIndex(SU->getInstr())); + if (LRQ.valueIn() == VNI) + getPressureDiff(SU).addPressureChange(Reg, true, &MRI); } - }); + } + } } /// schedule - Called back from MachineScheduler::runOnMachineFunction @@ -585,6 +691,13 @@ void ScheduleDAGMI::schedule() { /// Build the DAG and setup three register pressure trackers. void ScheduleDAGMI::buildDAGWithRegPressure() { + if (!ShouldTrackPressure) { + RPTracker.reset(); + RegionCriticalPSets.clear(); + buildSchedGraph(AA); + return; + } + // Initialize the register pressure tracker used by buildSchedGraph. RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd, /*TrackUntiedDefs=*/true); @@ -594,7 +707,7 @@ void ScheduleDAGMI::buildDAGWithRegPressure() { RPTracker.recede(); // Build the DAG, and compute current register pressure. - buildSchedGraph(AA, &RPTracker); + buildSchedGraph(AA, &RPTracker, &SUPressureDiffs); // Initialize top/bottom trackers after computing region pressure. initRegPressure(); @@ -637,6 +750,91 @@ void ScheduleDAGMI::findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots, ExitSU.biasCriticalPath(); } +/// Compute the max cyclic critical path through the DAG. The scheduling DAG +/// only provides the critical path for single block loops. To handle loops that +/// span blocks, we could use the vreg path latencies provided by +/// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently +/// available for use in the scheduler. +/// +/// The cyclic path estimation identifies a def-use pair that crosses the back +/// edge and considers the depth and height of the nodes. For example, consider +/// the following instruction sequence where each instruction has unit latency +/// and defines an epomymous virtual register: +/// +/// a->b(a,c)->c(b)->d(c)->exit +/// +/// The cyclic critical path is a two cycles: b->c->b +/// The acyclic critical path is four cycles: a->b->c->d->exit +/// LiveOutHeight = height(c) = len(c->d->exit) = 2 +/// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3 +/// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4 +/// LiveInDepth = depth(b) = len(a->b) = 1 +/// +/// LiveOutDepth - LiveInDepth = 3 - 1 = 2 +/// LiveInHeight - LiveOutHeight = 4 - 2 = 2 +/// CyclicCriticalPath = min(2, 2) = 2 +unsigned ScheduleDAGMI::computeCyclicCriticalPath() { + // This only applies to single block loop. + if (!BB->isSuccessor(BB)) + return 0; + + unsigned MaxCyclicLatency = 0; + // Visit each live out vreg def to find def/use pairs that cross iterations. + ArrayRef<unsigned> LiveOuts = RPTracker.getPressure().LiveOutRegs; + for (ArrayRef<unsigned>::iterator RI = LiveOuts.begin(), RE = LiveOuts.end(); + RI != RE; ++RI) { + unsigned Reg = *RI; + if (!TRI->isVirtualRegister(Reg)) + continue; + const LiveInterval &LI = LIS->getInterval(Reg); + const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB)); + if (!DefVNI) + continue; + + MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def); + const SUnit *DefSU = getSUnit(DefMI); + if (!DefSU) + continue; + + unsigned LiveOutHeight = DefSU->getHeight(); + unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency; + // Visit all local users of the vreg def. + for (VReg2UseMap::iterator + UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) { + if (UI->SU == &ExitSU) + continue; + + // Only consider uses of the phi. + LiveQueryResult LRQ = + LI.Query(LIS->getInstructionIndex(UI->SU->getInstr())); + if (!LRQ.valueIn()->isPHIDef()) + continue; + + // Assume that a path spanning two iterations is a cycle, which could + // overestimate in strange cases. This allows cyclic latency to be + // estimated as the minimum slack of the vreg's depth or height. + unsigned CyclicLatency = 0; + if (LiveOutDepth > UI->SU->getDepth()) + CyclicLatency = LiveOutDepth - UI->SU->getDepth(); + + unsigned LiveInHeight = UI->SU->getHeight() + DefSU->Latency; + if (LiveInHeight > LiveOutHeight) { + if (LiveInHeight - LiveOutHeight < CyclicLatency) + CyclicLatency = LiveInHeight - LiveOutHeight; + } + else + CyclicLatency = 0; + + DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU(" + << UI->SU->NodeNum << ") = " << CyclicLatency << "c\n"); + if (CyclicLatency > MaxCyclicLatency) + MaxCyclicLatency = CyclicLatency; + } + } + DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n"); + return MaxCyclicLatency; +} + /// Identify DAG roots and setup scheduler queues. void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots, ArrayRef<SUnit*> BotRoots) { @@ -664,11 +862,13 @@ void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots, SchedImpl->registerRoots(); // Advance past initial DebugValues. - assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker"); CurrentTop = nextIfDebug(RegionBegin, RegionEnd); - TopRPTracker.setPos(CurrentTop); - CurrentBottom = RegionEnd; + + if (ShouldTrackPressure) { + assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker"); + TopRPTracker.setPos(CurrentTop); + } } /// Move an instruction and update register pressure. @@ -685,10 +885,12 @@ void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) { TopRPTracker.setPos(MI); } - // Update top scheduled pressure. - TopRPTracker.advance(); - assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); - updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure); + if (ShouldTrackPressure) { + // Update top scheduled pressure. + TopRPTracker.advance(); + assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); + updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure); + } } else { assert(SU->isBottomReady() && "node still has unscheduled dependencies"); @@ -704,10 +906,14 @@ void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) { moveInstruction(MI, CurrentBottom); CurrentBottom = MI; } - // Update bottom scheduled pressure. - BotRPTracker.recede(); - assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); - updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure); + if (ShouldTrackPressure) { + // Update bottom scheduled pressure. + SmallVector<unsigned, 8> LiveUses; + BotRPTracker.recede(&LiveUses); + assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); + updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure); + updatePressureDiffs(LiveUses); + } } } @@ -1113,13 +1319,13 @@ void CopyConstrain::apply(ScheduleDAGMI *DAG) { } //===----------------------------------------------------------------------===// -// ConvergingScheduler - Implementation of the generic MachineSchedStrategy. +// GenericScheduler - Implementation of the generic MachineSchedStrategy. //===----------------------------------------------------------------------===// namespace { -/// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance +/// GenericScheduler shrinks the unscheduled zone using heuristics to balance /// the schedule. -class ConvergingScheduler : public MachineSchedStrategy { +class GenericScheduler : public MachineSchedStrategy { public: /// Represent the type of SchedCandidate found within a single queue. /// pickNodeBidirectional depends on these listed by decreasing priority. @@ -1129,7 +1335,7 @@ public: TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder}; #ifndef NDEBUG - static const char *getReasonStr(ConvergingScheduler::CandReason Reason); + static const char *getReasonStr(GenericScheduler::CandReason Reason); #endif /// Policy for scheduling the next instruction in the candidate's zone. @@ -1160,7 +1366,7 @@ public: } }; - /// Store the state used by ConvergingScheduler heuristics, required for the + /// Store the state used by GenericScheduler heuristics, required for the /// lifetime of one invocation of pickNode(). struct SchedCandidate { CandPolicy Policy; @@ -1205,16 +1411,21 @@ public: struct SchedRemainder { // Critical path through the DAG in expected latency. unsigned CriticalPath; + unsigned CyclicCritPath; // Scaled count of micro-ops left to schedule. unsigned RemIssueCount; + bool IsAcyclicLatencyLimited; + // Unscheduled resources SmallVector<unsigned, 16> RemainingCounts; void reset() { CriticalPath = 0; + CyclicCritPath = 0; RemIssueCount = 0; + IsAcyclicLatencyLimited = false; RemainingCounts.clear(); } @@ -1288,13 +1499,16 @@ public: void reset() { // A new HazardRec is created for each DAG and owned by SchedBoundary. - delete HazardRec; - + // Destroying and reconstructing it is very expensive though. So keep + // invalid, placeholder HazardRecs. + if (HazardRec && HazardRec->isEnabled()) { + delete HazardRec; + HazardRec = 0; + } Available.clear(); Pending.clear(); CheckPending = false; NextSUs.clear(); - HazardRec = 0; CurrCycle = 0; CurrMOps = 0; MinReadyCycle = UINT_MAX; @@ -1316,7 +1530,7 @@ public: /// PendingFlag set. SchedBoundary(unsigned ID, const Twine &Name): DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"), - Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P"), + Pending(ID << GenericScheduler::LogMaxQID, Name+".P"), HazardRec(0) { reset(); } @@ -1327,7 +1541,7 @@ public: SchedRemainder *rem); bool isTop() const { - return Available.getID() == ConvergingScheduler::TopQID; + return Available.getID() == GenericScheduler::TopQID; } #ifndef NDEBUG @@ -1399,6 +1613,7 @@ public: }; private: + const MachineSchedContext *Context; ScheduleDAGMI *DAG; const TargetSchedModel *SchedModel; const TargetRegisterInfo *TRI; @@ -1408,6 +1623,7 @@ private: SchedBoundary Top; SchedBoundary Bot; + MachineSchedPolicy RegionPolicy; public: /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both) enum { @@ -1416,8 +1632,15 @@ public: LogMaxQID = 2 }; - ConvergingScheduler(): - DAG(0), SchedModel(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {} + GenericScheduler(const MachineSchedContext *C): + Context(C), DAG(0), SchedModel(0), TRI(0), + Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {} + + virtual void initPolicy(MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + unsigned NumRegionInstrs); + + bool shouldTrackPressure() const { return RegionPolicy.ShouldTrackPressure; } virtual void initialize(ScheduleDAGMI *dag); @@ -1432,6 +1655,8 @@ public: virtual void registerRoots(); protected: + void checkAcyclicLatency(); + void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary &Zone, @@ -1452,7 +1677,7 @@ protected: }; } // namespace -void ConvergingScheduler::SchedRemainder:: +void GenericScheduler::SchedRemainder:: init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { reset(); if (!SchedModel->hasInstrSchedModel()) @@ -1473,7 +1698,7 @@ init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { } } -void ConvergingScheduler::SchedBoundary:: +void GenericScheduler::SchedBoundary:: init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) { reset(); DAG = dag; @@ -1483,7 +1708,49 @@ init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) { ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds()); } -void ConvergingScheduler::initialize(ScheduleDAGMI *dag) { +/// Initialize the per-region scheduling policy. +void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + unsigned NumRegionInstrs) { + const TargetMachine &TM = Context->MF->getTarget(); + + // Avoid setting up the register pressure tracker for small regions to save + // compile time. As a rough heuristic, only track pressure when the number of + // schedulable instructions exceeds half the integer register file. + unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs( + TM.getTargetLowering()->getRegClassFor(MVT::i32)); + + RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2); + + // For generic targets, we default to bottom-up, because it's simpler and more + // compile-time optimizations have been implemented in that direction. + RegionPolicy.OnlyBottomUp = true; + + // Allow the subtarget to override default policy. + const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); + ST.overrideSchedPolicy(RegionPolicy, Begin, End, NumRegionInstrs); + + // After subtarget overrides, apply command line options. + if (!EnableRegPressure) + RegionPolicy.ShouldTrackPressure = false; + + // Check -misched-topdown/bottomup can force or unforce scheduling direction. + // e.g. -misched-bottomup=false allows scheduling in both directions. + assert((!ForceTopDown || !ForceBottomUp) && + "-misched-topdown incompatible with -misched-bottomup"); + if (ForceBottomUp.getNumOccurrences() > 0) { + RegionPolicy.OnlyBottomUp = ForceBottomUp; + if (RegionPolicy.OnlyBottomUp) + RegionPolicy.OnlyTopDown = false; + } + if (ForceTopDown.getNumOccurrences() > 0) { + RegionPolicy.OnlyTopDown = ForceTopDown; + if (RegionPolicy.OnlyTopDown) + RegionPolicy.OnlyBottomUp = false; + } +} + +void GenericScheduler::initialize(ScheduleDAGMI *dag) { DAG = dag; SchedModel = DAG->getSchedModel(); TRI = DAG->TRI; @@ -1498,14 +1765,17 @@ void ConvergingScheduler::initialize(ScheduleDAGMI *dag) { // are disabled, then these HazardRecs will be disabled. const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); const TargetMachine &TM = DAG->MF.getTarget(); - Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); - Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); - - assert((!ForceTopDown || !ForceBottomUp) && - "-misched-topdown incompatible with -misched-bottomup"); + if (!Top.HazardRec) { + Top.HazardRec = + TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); + } + if (!Bot.HazardRec) { + Bot.HazardRec = + TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); + } } -void ConvergingScheduler::releaseTopNode(SUnit *SU) { +void GenericScheduler::releaseTopNode(SUnit *SU) { if (SU->isScheduled) return; @@ -1524,7 +1794,7 @@ void ConvergingScheduler::releaseTopNode(SUnit *SU) { Top.releaseNode(SU, SU->TopReadyCycle); } -void ConvergingScheduler::releaseBottomNode(SUnit *SU) { +void GenericScheduler::releaseBottomNode(SUnit *SU) { if (SU->isScheduled) return; @@ -1545,8 +1815,46 @@ void ConvergingScheduler::releaseBottomNode(SUnit *SU) { Bot.releaseNode(SU, SU->BotReadyCycle); } -void ConvergingScheduler::registerRoots() { +/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic +/// critical path by more cycles than it takes to drain the instruction buffer. +/// We estimate an upper bounds on in-flight instructions as: +/// +/// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height ) +/// InFlightIterations = AcyclicPath / CyclesPerIteration +/// InFlightResources = InFlightIterations * LoopResources +/// +/// TODO: Check execution resources in addition to IssueCount. +void GenericScheduler::checkAcyclicLatency() { + if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath) + return; + + // Scaled number of cycles per loop iteration. + unsigned IterCount = + std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(), + Rem.RemIssueCount); + // Scaled acyclic critical path. + unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor(); + // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop + unsigned InFlightCount = + (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount; + unsigned BufferLimit = + SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor(); + + Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit; + + DEBUG(dbgs() << "IssueCycles=" + << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c " + << "IterCycles=" << IterCount / SchedModel->getLatencyFactor() + << "c NumIters=" << (AcyclicCount + IterCount-1) / IterCount + << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor() + << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n"; + if (Rem.IsAcyclicLatencyLimited) + dbgs() << " ACYCLIC LATENCY LIMIT\n"); +} + +void GenericScheduler::registerRoots() { Rem.CriticalPath = DAG->ExitSU.getDepth(); + // Some roots may not feed into ExitSU. Check all of them in case. for (std::vector<SUnit*>::const_iterator I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) { @@ -1554,6 +1862,11 @@ void ConvergingScheduler::registerRoots() { Rem.CriticalPath = (*I)->getDepth(); } DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n'); + + if (EnableCyclicPath) { + Rem.CyclicCritPath = DAG->computeCyclicCriticalPath(); + checkAcyclicLatency(); + } } /// Does this SU have a hazard within the current instruction group. @@ -1569,7 +1882,7 @@ void ConvergingScheduler::registerRoots() { /// can dispatch per cycle. /// /// TODO: Also check whether the SU must start a new group. -bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) { +bool GenericScheduler::SchedBoundary::checkHazard(SUnit *SU) { if (HazardRec->isEnabled()) return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; @@ -1583,7 +1896,7 @@ bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) { } // Find the unscheduled node in ReadySUs with the highest latency. -unsigned ConvergingScheduler::SchedBoundary:: +unsigned GenericScheduler::SchedBoundary:: findMaxLatency(ArrayRef<SUnit*> ReadySUs) { SUnit *LateSU = 0; unsigned RemLatency = 0; @@ -1605,7 +1918,7 @@ findMaxLatency(ArrayRef<SUnit*> ReadySUs) { // Count resources in this zone and the remaining unscheduled // instruction. Return the max count, scaled. Set OtherCritIdx to the critical // resource index, or zero if the zone is issue limited. -unsigned ConvergingScheduler::SchedBoundary:: +unsigned GenericScheduler::SchedBoundary:: getOtherResourceCount(unsigned &OtherCritIdx) { OtherCritIdx = 0; if (!SchedModel->hasInstrSchedModel()) @@ -1633,7 +1946,7 @@ getOtherResourceCount(unsigned &OtherCritIdx) { /// Set the CandPolicy for this zone given the current resources and latencies /// inside and outside the zone. -void ConvergingScheduler::SchedBoundary::setPolicy(CandPolicy &Policy, +void GenericScheduler::SchedBoundary::setPolicy(CandPolicy &Policy, SchedBoundary &OtherZone) { // Now that potential stalls have been considered, apply preemptive heuristics // based on the the total latency and resources inside and outside this @@ -1692,7 +2005,7 @@ void ConvergingScheduler::SchedBoundary::setPolicy(CandPolicy &Policy, Policy.DemandResIdx = OtherCritIdx; } -void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU, +void GenericScheduler::SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) { if (ReadyCycle < MinReadyCycle) MinReadyCycle = ReadyCycle; @@ -1710,7 +2023,7 @@ void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU, } /// Move the boundary of scheduled code by one cycle. -void ConvergingScheduler::SchedBoundary::bumpCycle(unsigned NextCycle) { +void GenericScheduler::SchedBoundary::bumpCycle(unsigned NextCycle) { if (SchedModel->getMicroOpBufferSize() == 0) { assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized"); if (MinReadyCycle > NextCycle) @@ -1748,7 +2061,7 @@ void ConvergingScheduler::SchedBoundary::bumpCycle(unsigned NextCycle) { DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n'); } -void ConvergingScheduler::SchedBoundary::incExecutedResources(unsigned PIdx, +void GenericScheduler::SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) { ExecutedResCounts[PIdx] += Count; if (ExecutedResCounts[PIdx] > MaxExecutedResCount) @@ -1762,7 +2075,7 @@ void ConvergingScheduler::SchedBoundary::incExecutedResources(unsigned PIdx, /// /// \return the next cycle at which the instruction may execute without /// oversubscribing resources. -unsigned ConvergingScheduler::SchedBoundary:: +unsigned GenericScheduler::SchedBoundary:: countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle) { unsigned Factor = SchedModel->getResourceFactor(PIdx); unsigned Count = Factor * Cycles; @@ -1787,7 +2100,7 @@ countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle) { } /// Move the boundary of scheduled code by one SUnit. -void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) { +void GenericScheduler::SchedBoundary::bumpNode(SUnit *SU) { // Update the reservation table. if (HazardRec->isEnabled()) { if (!isTop() && SU->isCall) { @@ -1891,7 +2204,7 @@ void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) { /// Release pending ready nodes in to the available queue. This makes them /// visible to heuristics. -void ConvergingScheduler::SchedBoundary::releasePending() { +void GenericScheduler::SchedBoundary::releasePending() { // If the available queue is empty, it is safe to reset MinReadyCycle. if (Available.empty()) MinReadyCycle = UINT_MAX; @@ -1921,7 +2234,7 @@ void ConvergingScheduler::SchedBoundary::releasePending() { } /// Remove SU from the ready set for this boundary. -void ConvergingScheduler::SchedBoundary::removeReady(SUnit *SU) { +void GenericScheduler::SchedBoundary::removeReady(SUnit *SU) { if (Available.isInQueue(SU)) Available.remove(Available.find(SU)); else { @@ -1933,7 +2246,7 @@ void ConvergingScheduler::SchedBoundary::removeReady(SUnit *SU) { /// If this queue only has one ready candidate, return it. As a side effect, /// defer any nodes that now hit a hazard, and advance the cycle until at least /// one node is ready. If multiple instructions are ready, return NULL. -SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() { +SUnit *GenericScheduler::SchedBoundary::pickOnlyChoice() { if (CheckPending) releasePending(); @@ -1962,7 +2275,7 @@ SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() { #ifndef NDEBUG // This is useful information to dump after bumpNode. // Note that the Queue contents are more useful before pickNodeFromQueue. -void ConvergingScheduler::SchedBoundary::dumpScheduledState() { +void GenericScheduler::SchedBoundary::dumpScheduledState() { unsigned ResFactor; unsigned ResCount; if (ZoneCritResIdx) { @@ -1985,7 +2298,7 @@ void ConvergingScheduler::SchedBoundary::dumpScheduledState() { } #endif -void ConvergingScheduler::SchedCandidate:: +void GenericScheduler::SchedCandidate:: initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { if (!Policy.ReduceResIdx && !Policy.DemandResIdx) @@ -2005,9 +2318,9 @@ initResourceDelta(const ScheduleDAGMI *DAG, /// Return true if this heuristic determines order. static bool tryLess(int TryVal, int CandVal, - ConvergingScheduler::SchedCandidate &TryCand, - ConvergingScheduler::SchedCandidate &Cand, - ConvergingScheduler::CandReason Reason) { + GenericScheduler::SchedCandidate &TryCand, + GenericScheduler::SchedCandidate &Cand, + GenericScheduler::CandReason Reason) { if (TryVal < CandVal) { TryCand.Reason = Reason; return true; @@ -2022,9 +2335,9 @@ static bool tryLess(int TryVal, int CandVal, } static bool tryGreater(int TryVal, int CandVal, - ConvergingScheduler::SchedCandidate &TryCand, - ConvergingScheduler::SchedCandidate &Cand, - ConvergingScheduler::CandReason Reason) { + GenericScheduler::SchedCandidate &TryCand, + GenericScheduler::SchedCandidate &Cand, + GenericScheduler::CandReason Reason) { if (TryVal > CandVal) { TryCand.Reason = Reason; return true; @@ -2038,26 +2351,26 @@ static bool tryGreater(int TryVal, int CandVal, return false; } -static bool tryPressure(const PressureElement &TryP, - const PressureElement &CandP, - ConvergingScheduler::SchedCandidate &TryCand, - ConvergingScheduler::SchedCandidate &Cand, - ConvergingScheduler::CandReason Reason) { +static bool tryPressure(const PressureChange &TryP, + const PressureChange &CandP, + GenericScheduler::SchedCandidate &TryCand, + GenericScheduler::SchedCandidate &Cand, + GenericScheduler::CandReason Reason) { + int TryRank = TryP.getPSetOrMax(); + int CandRank = CandP.getPSetOrMax(); // If both candidates affect the same set, go with the smallest increase. - if (TryP.PSetID == CandP.PSetID) { - return tryLess(TryP.UnitIncrease, CandP.UnitIncrease, TryCand, Cand, + if (TryRank == CandRank) { + return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand, Reason); } // If one candidate decreases and the other increases, go with it. - if (tryLess(TryP.UnitIncrease < 0, CandP.UnitIncrease < 0, TryCand, Cand, + // Invalid candidates have UnitInc==0. + if (tryLess(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand, Reason)) { return true; } - // If TryP has lower Rank, it has a higher priority. - int TryRank = TryP.PSetRank(); - int CandRank = CandP.PSetRank(); // If the candidates are decreasing pressure, reverse priority. - if (TryP.UnitIncrease < 0) + if (TryP.getUnitInc() < 0) std::swap(TryRank, CandRank); return tryGreater(TryRank, CandRank, TryCand, Cand, Reason); } @@ -2094,6 +2407,32 @@ static int biasPhysRegCopy(const SUnit *SU, bool isTop) { return 0; } +static bool tryLatency(GenericScheduler::SchedCandidate &TryCand, + GenericScheduler::SchedCandidate &Cand, + GenericScheduler::SchedBoundary &Zone) { + if (Zone.isTop()) { + if (Cand.SU->getDepth() > Zone.getScheduledLatency()) { + if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), + TryCand, Cand, GenericScheduler::TopDepthReduce)) + return true; + } + if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(), + TryCand, Cand, GenericScheduler::TopPathReduce)) + return true; + } + else { + if (Cand.SU->getHeight() > Zone.getScheduledLatency()) { + if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), + TryCand, Cand, GenericScheduler::BotHeightReduce)) + return true; + } + if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(), + TryCand, Cand, GenericScheduler::BotPathReduce)) + return true; + } + return false; +} + /// Apply a set of heursitics to a new candidate. Heuristics are currently /// hierarchical. This may be more efficient than a graduated cost model because /// we don't need to evaluate all aspects of the model for each node in the @@ -2105,16 +2444,44 @@ static int biasPhysRegCopy(const SUnit *SU, bool isTop) { /// \param Zone describes the scheduled zone that we are extending. /// \param RPTracker describes reg pressure within the scheduled zone. /// \param TempTracker is a scratch pressure tracker to reuse in queries. -void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, +void GenericScheduler::tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary &Zone, const RegPressureTracker &RPTracker, RegPressureTracker &TempTracker) { - // Always initialize TryCand's RPDelta. - TempTracker.getMaxPressureDelta(TryCand.SU->getInstr(), TryCand.RPDelta, - DAG->getRegionCriticalPSets(), - DAG->getRegPressure().MaxSetPressure); + if (DAG->isTrackingPressure()) { + // Always initialize TryCand's RPDelta. + if (Zone.isTop()) { + TempTracker.getMaxDownwardPressureDelta( + TryCand.SU->getInstr(), + TryCand.RPDelta, + DAG->getRegionCriticalPSets(), + DAG->getRegPressure().MaxSetPressure); + } + else { + if (VerifyScheduling) { + TempTracker.getMaxUpwardPressureDelta( + TryCand.SU->getInstr(), + &DAG->getPressureDiff(TryCand.SU), + TryCand.RPDelta, + DAG->getRegionCriticalPSets(), + DAG->getRegPressure().MaxSetPressure); + } + else { + RPTracker.getUpwardPressureDelta( + TryCand.SU->getInstr(), + DAG->getPressureDiff(TryCand.SU), + TryCand.RPDelta, + DAG->getRegionCriticalPSets(), + DAG->getRegPressure().MaxSetPressure); + } + } + } + DEBUG(if (TryCand.RPDelta.Excess.isValid()) + dbgs() << " SU(" << TryCand.SU->NodeNum << ") " + << TRI->getRegPressureSetName(TryCand.RPDelta.Excess.getPSet()) + << ":" << TryCand.RPDelta.Excess.getUnitInc() << "\n"); // Initialize the candidate if needed. if (!Cand.isValid()) { @@ -2129,13 +2496,22 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, // Avoid exceeding the target's limit. If signed PSetID is negative, it is // invalid; convert it to INT_MAX to give it lowest priority. - if (tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand, - RegExcess)) + if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess, + Cand.RPDelta.Excess, + TryCand, Cand, RegExcess)) return; // Avoid increasing the max critical pressure in the scheduled region. - if (tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax, - TryCand, Cand, RegCritical)) + if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax, + Cand.RPDelta.CriticalMax, + TryCand, Cand, RegCritical)) + return; + + // For loops that are acyclic path limited, aggressively schedule for latency. + // This can result in very long dependence chains scheduled in sequence, so + // once every cycle (when CurrMOps == 0), switch to normal heuristics. + if (Rem.IsAcyclicLatencyLimited && !Zone.CurrMOps + && tryLatency(TryCand, Cand, Zone)) return; // Keep clustered nodes together to encourage downstream peephole @@ -2157,8 +2533,9 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, return; } // Avoid increasing the max pressure of the entire region. - if (tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, - TryCand, Cand, RegMax)) + if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax, + Cand.RPDelta.CurrentMax, + TryCand, Cand, RegMax)) return; // Avoid critical resource consumption and balance the schedule. @@ -2172,27 +2549,10 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, return; // Avoid serializing long latency dependence chains. - if (Cand.Policy.ReduceLatency) { - if (Zone.isTop()) { - if (Cand.SU->getDepth() > Zone.getScheduledLatency()) { - if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), - TryCand, Cand, TopDepthReduce)) - return; - } - if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(), - TryCand, Cand, TopPathReduce)) - return; - } - else { - if (Cand.SU->getHeight() > Zone.getScheduledLatency()) { - if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), - TryCand, Cand, BotHeightReduce)) - return; - } - if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(), - TryCand, Cand, BotPathReduce)) - return; - } + // For acyclic path limited loops, latency was already checked above. + if (Cand.Policy.ReduceLatency && !Rem.IsAcyclicLatencyLimited + && tryLatency(TryCand, Cand, Zone)) { + return; } // Prefer immediate defs/users of the last scheduled instruction. This is a @@ -2210,8 +2570,8 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, } #ifndef NDEBUG -const char *ConvergingScheduler::getReasonStr( - ConvergingScheduler::CandReason Reason) { +const char *GenericScheduler::getReasonStr( + GenericScheduler::CandReason Reason) { switch (Reason) { case NoCand: return "NOCAND "; case PhysRegCopy: return "PREG-COPY"; @@ -2232,8 +2592,8 @@ const char *ConvergingScheduler::getReasonStr( llvm_unreachable("Unknown reason!"); } -void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand) { - PressureElement P; +void GenericScheduler::traceCandidate(const SchedCandidate &Cand) { + PressureChange P; unsigned ResIdx = 0; unsigned Latency = 0; switch (Cand.Reason) { @@ -2269,8 +2629,8 @@ void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand) { } dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason); if (P.isValid()) - dbgs() << " " << TRI->getRegPressureSetName(P.PSetID) - << ":" << P.UnitIncrease << " "; + dbgs() << " " << TRI->getRegPressureSetName(P.getPSet()) + << ":" << P.getUnitInc() << " "; else dbgs() << " "; if (ResIdx) @@ -2285,12 +2645,12 @@ void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand) { } #endif -/// Pick the best candidate from the top queue. +/// Pick the best candidate from the queue. /// /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during /// DAG building. To adjust for the current scheduling location we need to /// maintain the number of vreg uses remaining to be top-scheduled. -void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone, +void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone, const RegPressureTracker &RPTracker, SchedCandidate &Cand) { ReadyQueue &Q = Zone.Available; @@ -2315,14 +2675,14 @@ void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone, } } -static void tracePick(const ConvergingScheduler::SchedCandidate &Cand, +static void tracePick(const GenericScheduler::SchedCandidate &Cand, bool IsTop) { DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ") - << ConvergingScheduler::getReasonStr(Cand.Reason) << '\n'); + << GenericScheduler::getReasonStr(Cand.Reason) << '\n'); } /// Pick the best candidate node from either the top or bottom queue. -SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) { +SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) { // Schedule as far as possible in the direction of no choice. This is most // efficient, but also provides the best heuristics for CriticalPSets. if (SUnit *SU = Bot.pickOnlyChoice()) { @@ -2377,7 +2737,7 @@ SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) { } /// Pick the best node to balance the schedule. Implements MachineSchedStrategy. -SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) { +SUnit *GenericScheduler::pickNode(bool &IsTopNode) { if (DAG->top() == DAG->bottom()) { assert(Top.Available.empty() && Top.Pending.empty() && Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); @@ -2385,24 +2745,26 @@ SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) { } SUnit *SU; do { - if (ForceTopDown) { + if (RegionPolicy.OnlyTopDown) { SU = Top.pickOnlyChoice(); if (!SU) { CandPolicy NoPolicy; SchedCandidate TopCand(NoPolicy); pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); - assert(TopCand.Reason != NoCand && "failed to find the first candidate"); + assert(TopCand.Reason != NoCand && "failed to find a candidate"); + tracePick(TopCand, true); SU = TopCand.SU; } IsTopNode = true; } - else if (ForceBottomUp) { + else if (RegionPolicy.OnlyBottomUp) { SU = Bot.pickOnlyChoice(); if (!SU) { CandPolicy NoPolicy; SchedCandidate BotCand(NoPolicy); pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); - assert(BotCand.Reason != NoCand && "failed to find the first candidate"); + assert(BotCand.Reason != NoCand && "failed to find a candidate"); + tracePick(BotCand, false); SU = BotCand.SU; } IsTopNode = false; @@ -2421,7 +2783,7 @@ SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) { return SU; } -void ConvergingScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) { +void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) { MachineBasicBlock::iterator InsertPos = SU->getInstr(); if (!isTop) @@ -2452,7 +2814,7 @@ void ConvergingScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) { /// /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling /// them here. See comments in biasPhysRegCopy. -void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) { +void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { if (IsTopNode) { SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.CurrCycle); Top.bumpNode(SU); @@ -2469,25 +2831,23 @@ void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) { /// Create the standard converging machine scheduler. This will be used as the /// default scheduler if the target does not set a default. -static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) { - assert((!ForceTopDown || !ForceBottomUp) && - "-misched-topdown incompatible with -misched-bottomup"); - ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new ConvergingScheduler()); +static ScheduleDAGInstrs *createGenericSched(MachineSchedContext *C) { + ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new GenericScheduler(C)); // Register DAG post-processors. // // FIXME: extend the mutation API to allow earlier mutations to instantiate // data and pass it to later mutations. Have a single mutation that gathers // the interesting nodes in one pass. DAG->addMutation(new CopyConstrain(DAG->TII, DAG->TRI)); - if (EnableLoadCluster) + if (EnableLoadCluster && DAG->TII->enableClusterLoads()) DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI)); if (EnableMacroFusion) DAG->addMutation(new MacroFusion(DAG->TII)); return DAG; } static MachineSchedRegistry -ConvergingSchedRegistry("converge", "Standard converging scheduler.", - createConvergingSched); +GenericSchedRegistry("converge", "Standard converging scheduler.", + createGenericSched); //===----------------------------------------------------------------------===// // ILP Scheduler. Currently for experimental analysis of heuristics. @@ -2529,15 +2889,6 @@ struct ILPOrder { /// \brief Schedule based on the ILP metric. class ILPScheduler : public MachineSchedStrategy { - /// In case all subtrees are eventually connected to a common root through - /// data dependence (e.g. reduction), place an upper limit on their size. - /// - /// FIXME: A subtree limit is generally good, but in the situation commented - /// above, where multiple similar subtrees feed a common root, we should - /// only split at a point where the resulting subtrees will be balanced. - /// (a motivating test case must be found). - static const unsigned SubtreeLimit = 16; - ScheduleDAGMI *DAG; ILPOrder Cmp; @@ -2721,7 +3072,7 @@ struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits { } static bool isNodeHidden(const SUnit *Node) { - return (Node->NumPreds > 10 || Node->NumSuccs > 10); + return (Node->Preds.size() > 10 || Node->Succs.size() > 10); } static bool hasNodeAddressLabel(const SUnit *Node, @@ -2744,7 +3095,11 @@ struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits { static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) { std::string Str; raw_string_ostream SS(Str); - SS << "SU(" << SU->NodeNum << ')'; + const SchedDFSResult *DFS = + static_cast<const ScheduleDAGMI*>(G)->getDFSResult(); + SS << "SU:" << SU->NodeNum; + if (DFS) + SS << " I:" << DFS->getNumInstrs(SU); return SS.str(); } static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) { diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index dacdbdd..105d7c2 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -308,12 +308,29 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr *MI, // to be sunk then it's probably worth it. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) continue; + if (!MO.isReg() || !MO.isUse()) + continue; unsigned Reg = MO.getReg(); - if (Reg == 0 || !TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Reg == 0) continue; - if (MRI->hasOneNonDBGUse(Reg)) - return true; + + // We don't move live definitions of physical registers, + // so sinking their uses won't enable any opportunities. + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + + // If this instruction is the only user of a virtual register, + // check if breaking the edge will enable sinking + // both this instruction and the defining instruction. + if (MRI->hasOneNonDBGUse(Reg)) { + // If the definition resides in same MBB, + // claim it's likely we can sink these together. + // If definition resides elsewhere, we aren't + // blocking it from being sunk so don't break the edge. + MachineInstr *DefMI = MRI->getVRegDef(Reg); + if (DefMI->getParent() == MI->getParent()) + return true; + } } return false; @@ -615,9 +632,8 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { DEBUG(dbgs() << "Sink instr " << *MI << "\tinto block " << *SuccToSinkTo); - // If the block has multiple predecessors, this would introduce computation on - // a path that it doesn't already exist. We could split the critical edge, - // but for now we just punt. + // If the block has multiple predecessors, this is a critical edge. + // Decide if we can sink along it or need to break the edge. if (SuccToSinkTo->pred_size() > 1) { // We cannot sink a load across a critical edge - there may be stores in // other code paths. diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index e74bfc8..d61470c 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -213,6 +213,10 @@ namespace { const LiveInterval &LI); void report(const char *msg, const MachineBasicBlock *MBB, const LiveInterval &LI); + void report(const char *msg, const MachineFunction *MF, + const LiveRange &LR); + void report(const char *msg, const MachineBasicBlock *MBB, + const LiveRange &LR); void verifyInlineAsm(const MachineInstr *MI); @@ -225,9 +229,10 @@ namespace { void verifyLiveVariables(); void verifyLiveIntervals(); void verifyLiveInterval(const LiveInterval&); - void verifyLiveIntervalValue(const LiveInterval&, VNInfo*); - void verifyLiveIntervalSegment(const LiveInterval&, - LiveInterval::const_iterator); + void verifyLiveRangeValue(const LiveRange&, const VNInfo*, unsigned); + void verifyLiveRangeSegment(const LiveRange&, + const LiveRange::const_iterator I, unsigned); + void verifyLiveRange(const LiveRange&, unsigned); void verifyStackFrame(); }; @@ -414,23 +419,25 @@ void MachineVerifier::report(const char *msg, void MachineVerifier::report(const char *msg, const MachineFunction *MF, const LiveInterval &LI) { report(msg, MF); - *OS << "- interval: "; - if (TargetRegisterInfo::isVirtualRegister(LI.reg)) - *OS << PrintReg(LI.reg, TRI); - else - *OS << PrintRegUnit(LI.reg, TRI); - *OS << ' ' << LI << '\n'; + *OS << "- interval: " << LI << '\n'; } void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB, const LiveInterval &LI) { report(msg, MBB); - *OS << "- interval: "; - if (TargetRegisterInfo::isVirtualRegister(LI.reg)) - *OS << PrintReg(LI.reg, TRI); - else - *OS << PrintRegUnit(LI.reg, TRI); - *OS << ' ' << LI << '\n'; + *OS << "- interval: " << LI << '\n'; +} + +void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB, + const LiveRange &LR) { + report(msg, MBB); + *OS << "- liverange: " << LR << "\n"; +} + +void MachineVerifier::report(const char *msg, const MachineFunction *MF, + const LiveRange &LR) { + report(msg, MF); + *OS << "- liverange: " << LR << "\n"; } void MachineVerifier::markReachable(const MachineBasicBlock *MBB) { @@ -768,7 +775,7 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { if (MI->getNumOperands() < MCID.getNumOperands()) { report("Too few operands", MI); *OS << MCID.getNumOperands() << " operands expected, but " - << MI->getNumExplicitOperands() << " given.\n"; + << MI->getNumOperands() << " given.\n"; } // Check the tied operands. @@ -826,7 +833,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { if (MO->isReg() && !(MI->isVariadic() && MONum == MCID.getNumOperands()-1)) { if (MO->isDef() && !MCOI.isOptionalDef()) - report("Explicit operand marked as def", MO, MONum); + report("Explicit operand marked as def", MO, MONum); if (MO->isImplicit()) report("Explicit operand marked as implicit", MO, MONum); } @@ -1001,16 +1008,16 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { // Check the cached regunit intervals. if (TargetRegisterInfo::isPhysicalRegister(Reg) && !isReserved(Reg)) { for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { - if (const LiveInterval *LI = LiveInts->getCachedRegUnit(*Units)) { - LiveRangeQuery LRQ(*LI, UseIdx); + if (const LiveRange *LR = LiveInts->getCachedRegUnit(*Units)) { + LiveQueryResult LRQ = LR->Query(UseIdx); if (!LRQ.valueIn()) { - report("No live range at use", MO, MONum); + report("No live segment at use", MO, MONum); *OS << UseIdx << " is not live in " << PrintRegUnit(*Units, TRI) - << ' ' << *LI << '\n'; + << ' ' << *LR << '\n'; } if (MO->isKill() && !LRQ.isKill()) { report("Live range continues after kill flag", MO, MONum); - *OS << PrintRegUnit(*Units, TRI) << ' ' << *LI << '\n'; + *OS << PrintRegUnit(*Units, TRI) << ' ' << *LR << '\n'; } } } @@ -1020,9 +1027,9 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { if (LiveInts->hasInterval(Reg)) { // This is a virtual register interval. const LiveInterval &LI = LiveInts->getInterval(Reg); - LiveRangeQuery LRQ(LI, UseIdx); + LiveQueryResult LRQ = LI.Query(UseIdx); if (!LRQ.valueIn()) { - report("No live range at use", MO, MONum); + report("No live segment at use", MO, MONum); *OS << UseIdx << " is not live in " << LI << '\n'; } // Check for extra kill flags. @@ -1071,7 +1078,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { llvm::next(MRI->def_begin(Reg)) != MRI->def_end()) report("Multiple virtual register defs in SSA form", MO, MONum); - // Check LiveInts for a live range, but only for virtual registers. + // Check LiveInts for a live segment, but only for virtual registers. if (LiveInts && TargetRegisterInfo::isVirtualRegister(Reg) && !LiveInts->isNotInMIMap(MI)) { SlotIndex DefIdx = LiveInts->getInstructionIndex(MI); @@ -1086,9 +1093,17 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { << DefIdx << " in " << LI << '\n'; } } else { - report("No live range at def", MO, MONum); + report("No live segment at def", MO, MONum); *OS << DefIdx << " is not live in " << LI << '\n'; } + // Check that, if the dead def flag is present, LiveInts agree. + if (MO->isDead()) { + LiveQueryResult LRQ = LI.Query(DefIdx); + if (!LRQ.isDeadDef()) { + report("Live range continues after dead def flag", MO, MONum); + *OS << "Live range: " << LI << '\n'; + } + } } else { report("Virtual register has no Live interval", MO, MONum); } @@ -1335,25 +1350,26 @@ void MachineVerifier::verifyLiveIntervals() { // Verify all the cached regunit intervals. for (unsigned i = 0, e = TRI->getNumRegUnits(); i != e; ++i) - if (const LiveInterval *LI = LiveInts->getCachedRegUnit(i)) - verifyLiveInterval(*LI); + if (const LiveRange *LR = LiveInts->getCachedRegUnit(i)) + verifyLiveRange(*LR, i); } -void MachineVerifier::verifyLiveIntervalValue(const LiveInterval &LI, - VNInfo *VNI) { +void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR, + const VNInfo *VNI, + unsigned Reg) { if (VNI->isUnused()) return; - const VNInfo *DefVNI = LI.getVNInfoAt(VNI->def); + const VNInfo *DefVNI = LR.getVNInfoAt(VNI->def); if (!DefVNI) { - report("Valno not live at def and not marked unused", MF, LI); + report("Valno not live at def and not marked unused", MF, LR); *OS << "Valno #" << VNI->id << '\n'; return; } if (DefVNI != VNI) { - report("Live range at def has different valno", MF, LI); + report("Live segment at def has different valno", MF, LR); *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << " where valno #" << DefVNI->id << " is live\n"; return; @@ -1361,15 +1377,15 @@ void MachineVerifier::verifyLiveIntervalValue(const LiveInterval &LI, const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def); if (!MBB) { - report("Invalid definition index", MF, LI); + report("Invalid definition index", MF, LR); *OS << "Valno #" << VNI->id << " is defined at " << VNI->def - << " in " << LI << '\n'; + << " in " << LR << '\n'; return; } if (VNI->isPHIDef()) { if (VNI->def != LiveInts->getMBBStartIdx(MBB)) { - report("PHIDef value is not defined at MBB start", MBB, LI); + report("PHIDef value is not defined at MBB start", MBB, LR); *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << ", not at the beginning of BB#" << MBB->getNumber() << '\n'; } @@ -1379,161 +1395,154 @@ void MachineVerifier::verifyLiveIntervalValue(const LiveInterval &LI, // Non-PHI def. const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def); if (!MI) { - report("No instruction at def index", MBB, LI); + report("No instruction at def index", MBB, LR); *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; return; } - bool hasDef = false; - bool isEarlyClobber = false; - for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) { - if (!MOI->isReg() || !MOI->isDef()) - continue; - if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { - if (MOI->getReg() != LI.reg) - continue; - } else { - if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) || - !TRI->hasRegUnit(MOI->getReg(), LI.reg)) + if (Reg != 0) { + bool hasDef = false; + bool isEarlyClobber = false; + for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) { + if (!MOI->isReg() || !MOI->isDef()) continue; + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (MOI->getReg() != Reg) + continue; + } else { + if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) || + !TRI->hasRegUnit(MOI->getReg(), Reg)) + continue; + } + hasDef = true; + if (MOI->isEarlyClobber()) + isEarlyClobber = true; } - hasDef = true; - if (MOI->isEarlyClobber()) - isEarlyClobber = true; - } - if (!hasDef) { - report("Defining instruction does not modify register", MI); - *OS << "Valno #" << VNI->id << " in " << LI << '\n'; - } + if (!hasDef) { + report("Defining instruction does not modify register", MI); + *OS << "Valno #" << VNI->id << " in " << LR << '\n'; + } - // Early clobber defs begin at USE slots, but other defs must begin at - // DEF slots. - if (isEarlyClobber) { - if (!VNI->def.isEarlyClobber()) { - report("Early clobber def must be at an early-clobber slot", MBB, LI); + // Early clobber defs begin at USE slots, but other defs must begin at + // DEF slots. + if (isEarlyClobber) { + if (!VNI->def.isEarlyClobber()) { + report("Early clobber def must be at an early-clobber slot", MBB, LR); + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; + } + } else if (!VNI->def.isRegister()) { + report("Non-PHI, non-early clobber def must be at a register slot", + MBB, LR); *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; } - } else if (!VNI->def.isRegister()) { - report("Non-PHI, non-early clobber def must be at a register slot", - MBB, LI); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; } } -void -MachineVerifier::verifyLiveIntervalSegment(const LiveInterval &LI, - LiveInterval::const_iterator I) { - const VNInfo *VNI = I->valno; - assert(VNI && "Live range has no valno"); - - if (VNI->id >= LI.getNumValNums() || VNI != LI.getValNumInfo(VNI->id)) { - report("Foreign valno in live range", MF, LI); - *OS << *I << " has a bad valno\n"; +void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, + const LiveRange::const_iterator I, + unsigned Reg) { + const LiveRange::Segment &S = *I; + const VNInfo *VNI = S.valno; + assert(VNI && "Live segment has no valno"); + + if (VNI->id >= LR.getNumValNums() || VNI != LR.getValNumInfo(VNI->id)) { + report("Foreign valno in live segment", MF, LR); + *OS << S << " has a bad valno\n"; } if (VNI->isUnused()) { - report("Live range valno is marked unused", MF, LI); - *OS << *I << '\n'; + report("Live segment valno is marked unused", MF, LR); + *OS << S << '\n'; } - const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(I->start); + const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(S.start); if (!MBB) { - report("Bad start of live segment, no basic block", MF, LI); - *OS << *I << '\n'; + report("Bad start of live segment, no basic block", MF, LR); + *OS << S << '\n'; return; } SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB); - if (I->start != MBBStartIdx && I->start != VNI->def) { - report("Live segment must begin at MBB entry or valno def", MBB, LI); - *OS << *I << '\n'; + if (S.start != MBBStartIdx && S.start != VNI->def) { + report("Live segment must begin at MBB entry or valno def", MBB, LR); + *OS << S << '\n'; } const MachineBasicBlock *EndMBB = - LiveInts->getMBBFromIndex(I->end.getPrevSlot()); + LiveInts->getMBBFromIndex(S.end.getPrevSlot()); if (!EndMBB) { - report("Bad end of live segment, no basic block", MF, LI); - *OS << *I << '\n'; + report("Bad end of live segment, no basic block", MF, LR); + *OS << S << '\n'; return; } // No more checks for live-out segments. - if (I->end == LiveInts->getMBBEndIdx(EndMBB)) + if (S.end == LiveInts->getMBBEndIdx(EndMBB)) return; // RegUnit intervals are allowed dead phis. - if (!TargetRegisterInfo::isVirtualRegister(LI.reg) && VNI->isPHIDef() && - I->start == VNI->def && I->end == VNI->def.getDeadSlot()) + if (!TargetRegisterInfo::isVirtualRegister(Reg) && VNI->isPHIDef() && + S.start == VNI->def && S.end == VNI->def.getDeadSlot()) return; // The live segment is ending inside EndMBB const MachineInstr *MI = - LiveInts->getInstructionFromIndex(I->end.getPrevSlot()); + LiveInts->getInstructionFromIndex(S.end.getPrevSlot()); if (!MI) { - report("Live segment doesn't end at a valid instruction", EndMBB, LI); - *OS << *I << '\n'; + report("Live segment doesn't end at a valid instruction", EndMBB, LR); + *OS << S << '\n'; return; } // The block slot must refer to a basic block boundary. - if (I->end.isBlock()) { - report("Live segment ends at B slot of an instruction", EndMBB, LI); - *OS << *I << '\n'; + if (S.end.isBlock()) { + report("Live segment ends at B slot of an instruction", EndMBB, LR); + *OS << S << '\n'; } - if (I->end.isDead()) { + if (S.end.isDead()) { // Segment ends on the dead slot. // That means there must be a dead def. - if (!SlotIndex::isSameInstr(I->start, I->end)) { - report("Live segment ending at dead slot spans instructions", EndMBB, LI); - *OS << *I << '\n'; + if (!SlotIndex::isSameInstr(S.start, S.end)) { + report("Live segment ending at dead slot spans instructions", EndMBB, LR); + *OS << S << '\n'; } } // A live segment can only end at an early-clobber slot if it is being // redefined by an early-clobber def. - if (I->end.isEarlyClobber()) { - if (I+1 == LI.end() || (I+1)->start != I->end) { + if (S.end.isEarlyClobber()) { + if (I+1 == LR.end() || (I+1)->start != S.end) { report("Live segment ending at early clobber slot must be " - "redefined by an EC def in the same instruction", EndMBB, LI); - *OS << *I << '\n'; + "redefined by an EC def in the same instruction", EndMBB, LR); + *OS << S << '\n'; } } // The following checks only apply to virtual registers. Physreg liveness // is too weird to check. - if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { - // A live range can end with either a redefinition, a kill flag on a + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + // A live segment can end with either a redefinition, a kill flag on a // use, or a dead flag on a def. bool hasRead = false; - bool hasDeadDef = false; for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) { - if (!MOI->isReg() || MOI->getReg() != LI.reg) + if (!MOI->isReg() || MOI->getReg() != Reg) continue; if (MOI->readsReg()) hasRead = true; - if (MOI->isDef() && MOI->isDead()) - hasDeadDef = true; } - - if (I->end.isDead()) { - if (!hasDeadDef) { - report("Instruction doesn't have a dead def operand", MI); - I->print(*OS); - *OS << " in " << LI << '\n'; - } - } else { + if (!S.end.isDead()) { if (!hasRead) { - report("Instruction ending live range doesn't read the register", MI); - *OS << *I << " in " << LI << '\n'; + report("Instruction ending live segment doesn't read the register", MI); + *OS << S << " in " << LR << '\n'; } } } // Now check all the basic blocks in this live segment. MachineFunction::const_iterator MFI = MBB; - // Is this live range the beginning of a non-PHIDef VN? - if (I->start == VNI->def && !VNI->isPHIDef()) { + // Is this live segment the beginning of a non-PHIDef VN? + if (S.start == VNI->def && !VNI->isPHIDef()) { // Not live-in to any blocks. if (MBB == EndMBB) return; @@ -1541,9 +1550,9 @@ MachineVerifier::verifyLiveIntervalSegment(const LiveInterval &LI, ++MFI; } for (;;) { - assert(LiveInts->isLiveInToMBB(LI, MFI)); + assert(LiveInts->isLiveInToMBB(LR, MFI)); // We don't know how to track physregs into a landing pad. - if (!TargetRegisterInfo::isVirtualRegister(LI.reg) && + if (!TargetRegisterInfo::isVirtualRegister(Reg) && MFI->isLandingPad()) { if (&*MFI == EndMBB) break; @@ -1559,11 +1568,11 @@ MachineVerifier::verifyLiveIntervalSegment(const LiveInterval &LI, for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(), PE = MFI->pred_end(); PI != PE; ++PI) { SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI); - const VNInfo *PVNI = LI.getVNInfoBefore(PEnd); + const VNInfo *PVNI = LR.getVNInfoBefore(PEnd); // All predecessors must have a live-out value. if (!PVNI) { - report("Register not marked live out of predecessor", *PI, LI); + report("Register not marked live out of predecessor", *PI, LR); *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber() << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before " << PEnd << '\n'; @@ -1572,7 +1581,7 @@ MachineVerifier::verifyLiveIntervalSegment(const LiveInterval &LI, // Only PHI-defs can take different predecessor values. if (!IsPHI && PVNI != VNI) { - report("Different value live out of predecessor", *PI, LI); + report("Different value live out of predecessor", *PI, LR); *OS << "Valno #" << PVNI->id << " live out of BB#" << (*PI)->getNumber() << '@' << PEnd << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber() @@ -1585,13 +1594,17 @@ MachineVerifier::verifyLiveIntervalSegment(const LiveInterval &LI, } } -void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) { - for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end(); - I!=E; ++I) - verifyLiveIntervalValue(LI, *I); +void MachineVerifier::verifyLiveRange(const LiveRange &LR, unsigned Reg) { + for (LiveRange::const_vni_iterator I = LR.vni_begin(), E = LR.vni_end(); + I != E; ++I) + verifyLiveRangeValue(LR, *I, Reg); + + for (LiveRange::const_iterator I = LR.begin(), E = LR.end(); I != E; ++I) + verifyLiveRangeSegment(LR, I, Reg); +} - for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I!=E; ++I) - verifyLiveIntervalSegment(LI, I); +void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) { + verifyLiveRange(LI, LI.reg); // Check the LI only has one connected component. if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index bf23eca..dcd9072 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -313,14 +313,14 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, if (IncomingReg) { // Add the region from the beginning of MBB to the copy instruction to // IncomingReg's live interval. - LiveInterval &IncomingLI = LIS->getOrCreateInterval(IncomingReg); + LiveInterval &IncomingLI = LIS->createEmptyInterval(IncomingReg); VNInfo *IncomingVNI = IncomingLI.getVNInfoAt(MBBStartIndex); if (!IncomingVNI) IncomingVNI = IncomingLI.getNextValue(MBBStartIndex, LIS->getVNInfoAllocator()); - IncomingLI.addRange(LiveRange(MBBStartIndex, - DestCopyIndex.getRegSlot(), - IncomingVNI)); + IncomingLI.addSegment(LiveInterval::Segment(MBBStartIndex, + DestCopyIndex.getRegSlot(), + IncomingVNI)); } LiveInterval &DestLI = LIS->getInterval(DestReg); @@ -332,14 +332,14 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, // the copy instruction. VNInfo *OrigDestVNI = DestLI.getVNInfoAt(MBBStartIndex); assert(OrigDestVNI && "PHI destination should be live at block entry."); - DestLI.removeRange(MBBStartIndex, MBBStartIndex.getDeadSlot()); + DestLI.removeSegment(MBBStartIndex, MBBStartIndex.getDeadSlot()); DestLI.createDeadDef(DestCopyIndex.getRegSlot(), LIS->getVNInfoAllocator()); DestLI.removeValNo(OrigDestVNI); } else { // Otherwise, remove the region from the beginning of MBB to the copy // instruction from DestReg's live interval. - DestLI.removeRange(MBBStartIndex, DestCopyIndex.getRegSlot()); + DestLI.removeSegment(MBBStartIndex, DestCopyIndex.getRegSlot()); VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getRegSlot()); assert(DestVNI && "PHI destination should be live at its definition."); DestVNI->def = DestCopyIndex.getRegSlot(); @@ -460,7 +460,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, if (LIS) { if (NewSrcInstr) { LIS->InsertMachineInstrInMaps(NewSrcInstr); - LIS->addLiveRangeToEndOfBlock(IncomingReg, NewSrcInstr); + LIS->addSegmentToEndOfBlock(IncomingReg, NewSrcInstr); } if (!SrcUndef && @@ -511,8 +511,8 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, "Cannot find kill instruction"); SlotIndex LastUseIndex = LIS->getInstructionIndex(KillInst); - SrcLI.removeRange(LastUseIndex.getRegSlot(), - LIS->getMBBEndIdx(&opBlock)); + SrcLI.removeSegment(LastUseIndex.getRegSlot(), + LIS->getMBBEndIdx(&opBlock)); } } } diff --git a/lib/CodeGen/PHIEliminationUtils.h b/lib/CodeGen/PHIEliminationUtils.h index 9ac47fb..48234ae 100644 --- a/lib/CodeGen/PHIEliminationUtils.h +++ b/lib/CodeGen/PHIEliminationUtils.h @@ -1,4 +1,4 @@ -//=- PHIEliminationUtils.h - Helper functions for PHI elimination *- C++ -*--=// +//=- PHIEliminationUtils.h - Helper functions for PHI elimination -*- C++ -*-=// // // The LLVM Compiler Infrastructure // diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index c0861c5..f4ffd03 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -58,8 +58,6 @@ OptimizeRegAlloc("optimize-regalloc", cl::Hidden, static cl::opt<cl::boolOrDefault> EnableMachineSched("enable-misched", cl::Hidden, cl::desc("Enable the machine instruction scheduling pass.")); -static cl::opt<bool> EnableStrongPHIElim("strong-phi-elim", cl::Hidden, - cl::desc("Use strong PHI elimination.")); static cl::opt<bool> DisablePostRAMachineLICM("disable-postra-machine-licm", cl::Hidden, cl::desc("Disable Machine LICM")); @@ -236,7 +234,7 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) // Temporarily disable experimental passes. const TargetSubtargetInfo &ST = TM->getSubtarget<TargetSubtargetInfo>(); - if (!ST.enableMachineScheduler()) + if (!ST.useMachineScheduler()) disablePass(&MachineSchedulerID); } @@ -675,24 +673,15 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { // preferably fix the scavenger to not depend on them). addPass(&LiveVariablesID); - // Add passes that move from transformed SSA into conventional SSA. This is a - // "copy coalescing" problem. - // - if (!EnableStrongPHIElim) { - // Edge splitting is smarter with machine loop info. - addPass(&MachineLoopInfoID); - addPass(&PHIEliminationID); - } + // Edge splitting is smarter with machine loop info. + addPass(&MachineLoopInfoID); + addPass(&PHIEliminationID); // Eventually, we want to run LiveIntervals before PHI elimination. if (EarlyLiveIntervals) addPass(&LiveIntervalsID); addPass(&TwoAddressInstructionPassID); - - if (EnableStrongPHIElim) - addPass(&StrongPHIEliminationID); - addPass(&RegisterCoalescerID); // PreRA instruction scheduling. diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index a7439b5..28f2d2f 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -40,20 +40,30 @@ // If the branch instruction can use flag from "sub", then we can replace // "sub" with "subs" and eliminate the "cmp" instruction. // -// - Optimize Bitcast pairs: -// -// v1 = bitcast v0 -// v2 = bitcast v1 -// = v2 -// => -// v1 = bitcast v0 -// = v0 -// // - Optimize Loads: // // Loads that can be folded into a later instruction. A load is foldable // if it loads to virtual registers and the virtual register defined has // a single use. +// +// - Optimize Copies and Bitcast: +// +// Rewrite copies and bitcasts to avoid cross register bank copies +// when possible. +// E.g., Consider the following example, where capital and lower +// letters denote different register file: +// b = copy A <-- cross-bank copy +// C = copy b <-- cross-bank copy +// => +// b = copy A <-- cross-bank copy +// C = copy A <-- same-bank copy +// +// E.g., for bitcast: +// b = bitcast A <-- cross-bank copy +// C = bitcast b <-- cross-bank copy +// => +// b = bitcast A <-- cross-bank copy +// C = copy A <-- same-bank copy //===----------------------------------------------------------------------===// #define DEBUG_TYPE "peephole-opt" @@ -81,11 +91,11 @@ DisablePeephole("disable-peephole", cl::Hidden, cl::init(false), cl::desc("Disable the peephole optimizer")); STATISTIC(NumReuse, "Number of extension results reused"); -STATISTIC(NumBitcasts, "Number of bitcasts eliminated"); STATISTIC(NumCmps, "Number of compares eliminated"); STATISTIC(NumImmFold, "Number of move immediate folded"); STATISTIC(NumLoadFold, "Number of loads folded"); STATISTIC(NumSelects, "Number of selects optimized"); +STATISTIC(NumCopiesBitcasts, "Number of copies/bitcasts optimized"); namespace { class PeepholeOptimizer : public MachineFunctionPass { @@ -112,11 +122,11 @@ namespace { } private: - bool optimizeBitcastInstr(MachineInstr *MI, MachineBasicBlock *MBB); bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, SmallPtrSet<MachineInstr*, 8> &LocalMIs); bool optimizeSelect(MachineInstr *MI); + bool optimizeCopyOrBitcast(MachineInstr *MI); bool isMoveImmediate(MachineInstr *MI, SmallSet<unsigned, 4> &ImmDefRegs, DenseMap<unsigned, MachineInstr*> &ImmDefMIs); @@ -298,78 +308,6 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, return Changed; } -/// optimizeBitcastInstr - If the instruction is a bitcast instruction A that -/// cannot be optimized away during isel (e.g. ARM::VMOVSR, which bitcast -/// a value cross register classes), and the source is defined by another -/// bitcast instruction B. And if the register class of source of B matches -/// the register class of instruction A, then it is legal to replace all uses -/// of the def of A with source of B. e.g. -/// %vreg0<def> = VMOVSR %vreg1 -/// %vreg3<def> = VMOVRS %vreg0 -/// Replace all uses of vreg3 with vreg1. - -bool PeepholeOptimizer::optimizeBitcastInstr(MachineInstr *MI, - MachineBasicBlock *MBB) { - unsigned NumDefs = MI->getDesc().getNumDefs(); - unsigned NumSrcs = MI->getDesc().getNumOperands() - NumDefs; - if (NumDefs != 1) - return false; - - unsigned Def = 0; - unsigned Src = 0; - for (unsigned i = 0, e = NumDefs + NumSrcs; i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) - continue; - unsigned Reg = MO.getReg(); - if (!Reg) - continue; - if (MO.isDef()) - Def = Reg; - else if (Src) - // Multiple sources? - return false; - else - Src = Reg; - } - - assert(Def && Src && "Malformed bitcast instruction!"); - - MachineInstr *DefMI = MRI->getVRegDef(Src); - if (!DefMI || !DefMI->isBitcast()) - return false; - - unsigned SrcSrc = 0; - NumDefs = DefMI->getDesc().getNumDefs(); - NumSrcs = DefMI->getDesc().getNumOperands() - NumDefs; - if (NumDefs != 1) - return false; - for (unsigned i = 0, e = NumDefs + NumSrcs; i != e; ++i) { - const MachineOperand &MO = DefMI->getOperand(i); - if (!MO.isReg() || MO.isDef()) - continue; - unsigned Reg = MO.getReg(); - if (!Reg) - continue; - if (!MO.isDef()) { - if (SrcSrc) - // Multiple sources? - return false; - else - SrcSrc = Reg; - } - } - - if (MRI->getRegClass(SrcSrc) != MRI->getRegClass(Def)) - return false; - - MRI->replaceRegWith(Def, SrcSrc); - MRI->clearKillFlags(SrcSrc); - MI->eraseFromParent(); - ++NumBitcasts; - return true; -} - /// optimizeCmpInstr - If the instruction is a compare and the previous /// instruction it's comparing against all ready sets (or could be modified to /// set) the same flag as the compare, then we can remove the comparison and use @@ -411,6 +349,150 @@ bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI) { return true; } +/// \brief Check if the registers defined by the pair (RegisterClass, SubReg) +/// share the same register file. +static bool shareSameRegisterFile(const TargetRegisterInfo &TRI, + const TargetRegisterClass *DefRC, + unsigned DefSubReg, + const TargetRegisterClass *SrcRC, + unsigned SrcSubReg) { + // Same register class. + if (DefRC == SrcRC) + return true; + + // Both operands are sub registers. Check if they share a register class. + unsigned SrcIdx, DefIdx; + if (SrcSubReg && DefSubReg) + return TRI.getCommonSuperRegClass(SrcRC, SrcSubReg, DefRC, DefSubReg, + SrcIdx, DefIdx) != NULL; + // At most one of the register is a sub register, make it Src to avoid + // duplicating the test. + if (!SrcSubReg) { + std::swap(DefSubReg, SrcSubReg); + std::swap(DefRC, SrcRC); + } + + // One of the register is a sub register, check if we can get a superclass. + if (SrcSubReg) + return TRI.getMatchingSuperRegClass(SrcRC, DefRC, SrcSubReg) != NULL; + // Plain copy. + return TRI.getCommonSubClass(DefRC, SrcRC) != NULL; +} + +/// \brief Get the index of the definition and source for \p Copy +/// instruction. +/// \pre Copy.isCopy() or Copy.isBitcast(). +/// \return True if the Copy instruction has only one register source +/// and one register definition. Otherwise, \p DefIdx and \p SrcIdx +/// are invalid. +static bool getCopyOrBitcastDefUseIdx(const MachineInstr &Copy, + unsigned &DefIdx, unsigned &SrcIdx) { + assert((Copy.isCopy() || Copy.isBitcast()) && "Wrong operation type."); + if (Copy.isCopy()) { + // Copy instruction are supposed to be: Def = Src. + if (Copy.getDesc().getNumOperands() != 2) + return false; + DefIdx = 0; + SrcIdx = 1; + assert(Copy.getOperand(DefIdx).isDef() && "Use comes before def!"); + return true; + } + // Bitcast case. + // Bitcasts with more than one def are not supported. + if (Copy.getDesc().getNumDefs() != 1) + return false; + // Initialize SrcIdx to an undefined operand. + SrcIdx = Copy.getDesc().getNumOperands(); + for (unsigned OpIdx = 0, EndOpIdx = SrcIdx; OpIdx != EndOpIdx; ++OpIdx) { + const MachineOperand &MO = Copy.getOperand(OpIdx); + if (!MO.isReg() || !MO.getReg()) + continue; + if (MO.isDef()) + DefIdx = OpIdx; + else if (SrcIdx != EndOpIdx) + // Multiple sources? + return false; + SrcIdx = OpIdx; + } + return true; +} + +/// \brief Optimize a copy or bitcast instruction to avoid cross +/// register bank copy. The optimization looks through a chain of +/// copies and try to find a source that has a compatible register +/// class. +/// Two register classes are considered to be compatible if they share +/// the same register bank. +/// New copies issued by this optimization are register allocator +/// friendly. This optimization does not remove any copy as it may +/// overconstraint the register allocator, but replaces some when +/// possible. +/// \pre \p MI is a Copy (MI->isCopy() is true) +/// \return True, when \p MI has been optimized. In that case, \p MI has +/// been removed from its parent. +bool PeepholeOptimizer::optimizeCopyOrBitcast(MachineInstr *MI) { + unsigned DefIdx, SrcIdx; + if (!MI || !getCopyOrBitcastDefUseIdx(*MI, DefIdx, SrcIdx)) + return false; + + const MachineOperand &MODef = MI->getOperand(DefIdx); + assert(MODef.isReg() && "Copies must be between registers."); + unsigned Def = MODef.getReg(); + + if (TargetRegisterInfo::isPhysicalRegister(Def)) + return false; + + const TargetRegisterClass *DefRC = MRI->getRegClass(Def); + unsigned DefSubReg = MODef.getSubReg(); + + unsigned Src; + unsigned SrcSubReg; + bool ShouldRewrite = false; + MachineInstr *Copy = MI; + const TargetRegisterInfo &TRI = *TM->getRegisterInfo(); + + // Follow the chain of copies until we reach the top or find a + // more suitable source. + do { + unsigned CopyDefIdx, CopySrcIdx; + if (!getCopyOrBitcastDefUseIdx(*Copy, CopyDefIdx, CopySrcIdx)) + break; + const MachineOperand &MO = Copy->getOperand(CopySrcIdx); + assert(MO.isReg() && "Copies must be between registers."); + Src = MO.getReg(); + + if (TargetRegisterInfo::isPhysicalRegister(Src)) + break; + + const TargetRegisterClass *SrcRC = MRI->getRegClass(Src); + SrcSubReg = MO.getSubReg(); + + // If this source does not incur a cross register bank copy, use it. + ShouldRewrite = shareSameRegisterFile(TRI, DefRC, DefSubReg, SrcRC, + SrcSubReg); + // Follow the chain of copies: get the definition of Src. + Copy = MRI->getVRegDef(Src); + } while (!ShouldRewrite && Copy && (Copy->isCopy() || Copy->isBitcast())); + + // If we did not find a more suitable source, there is nothing to optimize. + if (!ShouldRewrite || Src == MI->getOperand(SrcIdx).getReg()) + return false; + + // Rewrite the copy to avoid a cross register bank penalty. + unsigned NewVR = TargetRegisterInfo::isPhysicalRegister(Def) ? Def : + MRI->createVirtualRegister(DefRC); + MachineInstr *NewCopy = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + TII->get(TargetOpcode::COPY), NewVR) + .addReg(Src, 0, SrcSubReg); + NewCopy->getOperand(0).setSubReg(DefSubReg); + + MRI->replaceRegWith(Def, NewVR); + MRI->clearKillFlags(NewVR); + MI->eraseFromParent(); + ++NumCopiesBitcasts; + return true; +} + /// isLoadFoldable - Check whether MI is a candidate for folding into a later /// instruction. We only fold loads to virtual registers and the virtual /// register defined has a single use. @@ -523,7 +605,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { if (MI->mayStore() || MI->isCall()) FoldAsLoadDefReg = 0; - if ((MI->isBitcast() && optimizeBitcastInstr(MI, MBB)) || + if (((MI->isBitcast() || MI->isCopy()) && optimizeCopyOrBitcast(MI)) || (MI->isCompare() && optimizeCmpInstr(MI, MBB)) || (MI->isSelect() && optimizeSelect(MI))) { // MI is deleted. diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index 27f5676..1afc1ec 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -127,6 +127,12 @@ namespace { /// The schedule. Null SUnit*'s represent noop instructions. std::vector<SUnit*> Sequence; + /// The index in BB of RegionEnd. + /// + /// This is the instruction number from the top of the current block, not + /// the SlotIndex. It is only used by the AntiDepBreaker. + unsigned EndIndex; + public: SchedulePostRATDList( MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, @@ -141,11 +147,14 @@ namespace { /// void startBlock(MachineBasicBlock *BB); + // Set the index of RegionEnd within the current BB. + void setEndIndex(unsigned EndIdx) { EndIndex = EndIdx; } + /// Initialize the scheduler state for the next scheduling region. virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, - unsigned endcount); + unsigned regioninstrs); /// Notify that the scheduler has finished scheduling the current region. virtual void exitRegion(); @@ -197,7 +206,7 @@ SchedulePostRATDList::SchedulePostRATDList( TargetSubtargetInfo::AntiDepBreakMode AntiDepMode, SmallVectorImpl<const TargetRegisterClass*> &CriticalPathRCs) : ScheduleDAGInstrs(MF, MLI, MDT, /*IsPostRA=*/true), AA(AA), - LiveRegs(TRI->getNumRegs()) + LiveRegs(TRI->getNumRegs()), EndIndex(0) { const TargetMachine &TM = MF.getTarget(); const InstrItineraryData *InstrItins = TM.getInstrItineraryData(); @@ -223,8 +232,8 @@ SchedulePostRATDList::~SchedulePostRATDList() { void SchedulePostRATDList::enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, - unsigned endcount) { - ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount); + unsigned regioninstrs) { + ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs); Sequence.clear(); } @@ -312,20 +321,21 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { unsigned Count = MBB->size(), CurrentCount = Count; for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) { MachineInstr *MI = llvm::prior(I); + --Count; // Calls are not scheduling boundaries before register allocation, but // post-ra we don't gain anything by scheduling across calls since we // don't need to worry about register pressure. if (MI->isCall() || TII->isSchedulingBoundary(MI, MBB, Fn)) { - Scheduler.enterRegion(MBB, I, Current, CurrentCount); + Scheduler.enterRegion(MBB, I, Current, CurrentCount - Count); + Scheduler.setEndIndex(CurrentCount); Scheduler.schedule(); Scheduler.exitRegion(); Scheduler.EmitSchedule(); Current = MI; - CurrentCount = Count - 1; + CurrentCount = Count; Scheduler.Observe(MI, CurrentCount); } I = MI; - --Count; if (MI->isBundle()) Count -= MI->getBundleSize(); } @@ -333,6 +343,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { assert((MBB->begin() == Current || CurrentCount != 0) && "Instruction count mismatch!"); Scheduler.enterRegion(MBB, MBB->begin(), Current, CurrentCount); + Scheduler.setEndIndex(CurrentCount); Scheduler.schedule(); Scheduler.exitRegion(); Scheduler.EmitSchedule(); @@ -504,11 +515,11 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { // Examine all used registers and set/clear kill flag. When a // register is used multiple times we only set the kill flag on - // the first use. + // the first use. Don't set kill flags on undef operands. killedRegs.reset(); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isUse()) continue; + if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; unsigned Reg = MO.getReg(); if ((Reg == 0) || MRI.isReserved(Reg)) continue; diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp index e4e18c3..0c5173a 100644 --- a/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/lib/CodeGen/ProcessImplicitDefs.cpp @@ -78,7 +78,7 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) { unsigned Reg = MI->getOperand(0).getReg(); if (TargetRegisterInfo::isVirtualRegister(Reg)) { - // For virtual regiusters, mark all uses as <undef>, and convert users to + // For virtual registers, mark all uses as <undef>, and convert users to // implicit-def when possible. for (MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(Reg), diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 1965188..b0e494f 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -14,9 +14,6 @@ // This pass must be run after register allocation. After this pass is // executed, it is illegal to construct MO_FrameIndex operands. // -// This pass provides an optional shrink wrapping variant of prolog/epilog -// insertion, enabled via --shrink-wrap. See ShrinkWrapping.cpp. -// //===----------------------------------------------------------------------===// #define DEBUG_TYPE "pei" @@ -66,6 +63,38 @@ STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged"); STATISTIC(NumBytesStackSpace, "Number of bytes used for stack in all functions"); +void PEI::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addPreserved<MachineLoopInfo>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<TargetPassConfig>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool PEI::isReturnBlock(MachineBasicBlock* MBB) { + return (MBB && !MBB->empty() && MBB->back().isReturn()); +} + +/// Compute the set of return blocks +void PEI::calculateSets(MachineFunction &Fn) { + // Sets used to compute spill, restore placement sets. + const std::vector<CalleeSavedInfo> &CSI = + Fn.getFrameInfo()->getCalleeSavedInfo(); + + // If no CSRs used, we are done. + if (CSI.empty()) + return; + + // Save refs to entry and return blocks. + EntryBlock = Fn.begin(); + for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end(); + MBB != E; ++MBB) + if (isReturnBlock(MBB)) + ReturnBlocks.push_back(MBB); + + return; +} + /// runOnMachineFunction - Insert prolog/epilog code and replace abstract /// frame indexes with appropriate references. /// @@ -93,16 +122,11 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { calculateCalleeSavedRegisters(Fn); // Determine placement of CSR spill/restore code: - // - With shrink wrapping, place spills and restores to tightly - // enclose regions in the Machine CFG of the function where - // they are used. - // - Without shink wrapping (default), place all spills in the - // entry block, all restores in return blocks. - placeCSRSpillsAndRestores(Fn); + // place all spills in the entry block, all restores in return blocks. + calculateSets(Fn); // Add the code to save and restore the callee saved registers - if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::Naked)) + if (!F->hasFnAttribute(Attribute::Naked)) insertCSRSpillsAndRestores(Fn); // Allow the target machine to make final modifications to the function @@ -117,8 +141,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { // called functions. Because of this, calculateCalleeSavedRegisters() // must be called before this function in order to set the AdjustsStack // and MaxCallFrameSize variables. - if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::Naked)) + if (!F->hasFnAttribute(Attribute::Naked)) insertPrologEpilogCode(Fn); // Replace all MO_FrameIndex operands with physical register references @@ -143,7 +166,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { << ") in " << Fn.getName() << ".\n"; delete RS; - clearAllSets(); + ReturnBlocks.clear(); return true; } @@ -221,8 +244,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &F) { return; // In Naked functions we aren't going to save any registers. - if (F.getFunction()->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::Naked)) + if (F.getFunction()->hasFnAttribute(Attribute::Naked)) return; std::vector<CalleeSavedInfo> CSI; @@ -286,7 +308,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &F) { } /// insertCSRSpillsAndRestores - Insert spill and restore code for -/// callee saved registers used in the function, handling shrink wrapping. +/// callee saved registers used in the function. /// void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { // Get callee saved register information. @@ -304,133 +326,33 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); MachineBasicBlock::iterator I; - if (!ShrinkWrapThisFunction) { - // Spill using target interface. - I = EntryBlock->begin(); - if (!TFI->spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) { - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - // Add the callee-saved register as live-in. - // It's killed at the spill. - EntryBlock->addLiveIn(CSI[i].getReg()); - - // Insert the spill to the stack frame. - unsigned Reg = CSI[i].getReg(); - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(*EntryBlock, I, Reg, true, - CSI[i].getFrameIdx(), RC, TRI); - } - } - - // Restore using target interface. - for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) { - MachineBasicBlock* MBB = ReturnBlocks[ri]; - I = MBB->end(); --I; - - // Skip over all terminator instructions, which are part of the return - // sequence. - MachineBasicBlock::iterator I2 = I; - while (I2 != MBB->begin() && (--I2)->isTerminator()) - I = I2; - - bool AtStart = I == MBB->begin(); - MachineBasicBlock::iterator BeforeI = I; - if (!AtStart) - --BeforeI; - - // Restore all registers immediately before the return and any - // terminators that precede it. - if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) { - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned Reg = CSI[i].getReg(); - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.loadRegFromStackSlot(*MBB, I, Reg, - CSI[i].getFrameIdx(), - RC, TRI); - assert(I != MBB->begin() && - "loadRegFromStackSlot didn't insert any code!"); - // Insert in reverse order. loadRegFromStackSlot can insert - // multiple instructions. - if (AtStart) - I = MBB->begin(); - else { - I = BeforeI; - ++I; - } - } - } - } - return; - } - - // Insert spills. - std::vector<CalleeSavedInfo> blockCSI; - for (CSRegBlockMap::iterator BI = CSRSave.begin(), - BE = CSRSave.end(); BI != BE; ++BI) { - MachineBasicBlock* MBB = BI->first; - CSRegSet save = BI->second; - - if (save.empty()) - continue; - - blockCSI.clear(); - for (CSRegSet::iterator RI = save.begin(), - RE = save.end(); RI != RE; ++RI) { - blockCSI.push_back(CSI[*RI]); - } - assert(blockCSI.size() > 0 && - "Could not collect callee saved register info"); - - I = MBB->begin(); - - // When shrink wrapping, use stack slot stores/loads. - for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) { + // Spill using target interface. + I = EntryBlock->begin(); + if (!TFI->spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) { + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { // Add the callee-saved register as live-in. // It's killed at the spill. - MBB->addLiveIn(blockCSI[i].getReg()); + EntryBlock->addLiveIn(CSI[i].getReg()); // Insert the spill to the stack frame. - unsigned Reg = blockCSI[i].getReg(); + unsigned Reg = CSI[i].getReg(); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(*MBB, I, Reg, - true, - blockCSI[i].getFrameIdx(), + TII.storeRegToStackSlot(*EntryBlock, I, Reg, true, CSI[i].getFrameIdx(), RC, TRI); } } - for (CSRegBlockMap::iterator BI = CSRRestore.begin(), - BE = CSRRestore.end(); BI != BE; ++BI) { - MachineBasicBlock* MBB = BI->first; - CSRegSet restore = BI->second; - - if (restore.empty()) - continue; + // Restore using target interface. + for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) { + MachineBasicBlock *MBB = ReturnBlocks[ri]; + I = MBB->end(); + --I; - blockCSI.clear(); - for (CSRegSet::iterator RI = restore.begin(), - RE = restore.end(); RI != RE; ++RI) { - blockCSI.push_back(CSI[*RI]); - } - assert(blockCSI.size() > 0 && - "Could not find callee saved register info"); - - // If MBB is empty and needs restores, insert at the _beginning_. - if (MBB->empty()) { - I = MBB->begin(); - } else { - I = MBB->end(); - --I; - - // Skip over all terminator instructions, which are part of the - // return sequence. - if (! I->isTerminator()) { - ++I; - } else { - MachineBasicBlock::iterator I2 = I; - while (I2 != MBB->begin() && (--I2)->isTerminator()) - I = I2; - } - } + // Skip over all terminator instructions, which are part of the return + // sequence. + MachineBasicBlock::iterator I2 = I; + while (I2 != MBB->begin() && (--I2)->isTerminator()) + I = I2; bool AtStart = I == MBB->begin(); MachineBasicBlock::iterator BeforeI = I; @@ -439,21 +361,21 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { // Restore all registers immediately before the return and any // terminators that precede it. - for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) { - unsigned Reg = blockCSI[i].getReg(); - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.loadRegFromStackSlot(*MBB, I, Reg, - blockCSI[i].getFrameIdx(), - RC, TRI); - assert(I != MBB->begin() && - "loadRegFromStackSlot didn't insert any code!"); - // Insert in reverse order. loadRegFromStackSlot can insert - // multiple instructions. - if (AtStart) - I = MBB->begin(); - else { - I = BeforeI; - ++I; + if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) { + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(*MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI); + assert(I != MBB->begin() && + "loadRegFromStackSlot didn't insert any code!"); + // Insert in reverse order. loadRegFromStackSlot can insert + // multiple instructions. + if (AtStart) + I = MBB->begin(); + else { + I = BeforeI; + ++I; + } } } } diff --git a/lib/CodeGen/PrologEpilogInserter.h b/lib/CodeGen/PrologEpilogInserter.h index 50f4daf..77cfa2b 100644 --- a/lib/CodeGen/PrologEpilogInserter.h +++ b/lib/CodeGen/PrologEpilogInserter.h @@ -14,9 +14,6 @@ // This pass must be run after register allocation. After this pass is // executed, it is illegal to construct MO_FrameIndex operands. // -// This pass also implements a shrink wrapping variant of prolog/epilog -// insertion. -// //===----------------------------------------------------------------------===// #ifndef LLVM_CODEGEN_PEI_H @@ -54,74 +51,16 @@ namespace llvm { // stack frame indexes. unsigned MinCSFrameIndex, MaxCSFrameIndex; - // Analysis info for spill/restore placement. - // "CSR": "callee saved register". - - // CSRegSet contains indices into the Callee Saved Register Info - // vector built by calculateCalleeSavedRegisters() and accessed - // via MF.getFrameInfo()->getCalleeSavedInfo(). - typedef SparseBitVector<> CSRegSet; - - // CSRegBlockMap maps MachineBasicBlocks to sets of callee - // saved register indices. - typedef DenseMap<MachineBasicBlock*, CSRegSet> CSRegBlockMap; - - // Set and maps for computing CSR spill/restore placement: - // used in function (UsedCSRegs) - // used in a basic block (CSRUsed) - // anticipatable in a basic block (Antic{In,Out}) - // available in a basic block (Avail{In,Out}) - // to be spilled at the entry to a basic block (CSRSave) - // to be restored at the end of a basic block (CSRRestore) - CSRegSet UsedCSRegs; - CSRegBlockMap CSRUsed; - CSRegBlockMap AnticIn, AnticOut; - CSRegBlockMap AvailIn, AvailOut; - CSRegBlockMap CSRSave; - CSRegBlockMap CSRRestore; - // Entry and return blocks of the current function. MachineBasicBlock* EntryBlock; SmallVector<MachineBasicBlock*, 4> ReturnBlocks; - // Map of MBBs to top level MachineLoops. - DenseMap<MachineBasicBlock*, MachineLoop*> TLLoops; - - // Flag to control shrink wrapping per-function: - // may choose to skip shrink wrapping for certain - // functions. - bool ShrinkWrapThisFunction; - // Flag to control whether to use the register scavenger to resolve // frame index materialization registers. Set according to // TRI->requiresFrameIndexScavenging() for the curren function. bool FrameIndexVirtualScavenging; -#ifndef NDEBUG - // Machine function handle. - MachineFunction* MF; - - // Flag indicating that the current function - // has at least one "short" path in the machine - // CFG from the entry block to an exit block. - bool HasFastExitPath; -#endif - - bool calculateSets(MachineFunction &Fn); - bool calcAnticInOut(MachineBasicBlock* MBB); - bool calcAvailInOut(MachineBasicBlock* MBB); - void calculateAnticAvail(MachineFunction &Fn); - bool addUsesForMEMERegion(MachineBasicBlock* MBB, - SmallVectorImpl<MachineBasicBlock *> &blks); - bool addUsesForTopLevelLoops(SmallVectorImpl<MachineBasicBlock *> &blks); - bool calcSpillPlacements(MachineBasicBlock* MBB, - SmallVectorImpl<MachineBasicBlock *> &blks, - CSRegBlockMap &prevSpills); - bool calcRestorePlacements(MachineBasicBlock* MBB, - SmallVectorImpl<MachineBasicBlock *> &blks, - CSRegBlockMap &prevRestores); - void placeSpillsAndRestores(MachineFunction &Fn); - void placeCSRSpillsAndRestores(MachineFunction &Fn); + void calculateSets(MachineFunction &Fn); void calculateCallsInformation(MachineFunction &Fn); void calculateCalleeSavedRegisters(MachineFunction &Fn); void insertCSRSpillsAndRestores(MachineFunction &Fn); @@ -132,44 +71,8 @@ namespace llvm { void scavengeFrameVirtualRegs(MachineFunction &Fn); void insertPrologEpilogCode(MachineFunction &Fn); - // Initialize DFA sets, called before iterations. - void clearAnticAvailSets(); - // Clear all sets constructed by shrink wrapping. - void clearAllSets(); - - // Initialize all shrink wrapping data. - void initShrinkWrappingInfo(); - - // Convienences for dealing with machine loops. - MachineBasicBlock* getTopLevelLoopPreheader(MachineLoop* LP); - MachineLoop* getTopLevelLoopParent(MachineLoop *LP); - - // Propgate CSRs used in MBB to all MBBs of loop LP. - void propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP); - // Convenience for recognizing return blocks. bool isReturnBlock(MachineBasicBlock* MBB); - -#ifndef NDEBUG - // Debugging methods. - - // Mark this function as having fast exit paths. - void findFastExitPath(); - - // Verify placement of spills/restores. - void verifySpillRestorePlacement(); - - std::string getBasicBlockName(const MachineBasicBlock* MBB); - std::string stringifyCSRegSet(const CSRegSet& s); - void dumpSet(const CSRegSet& s); - void dumpUsed(MachineBasicBlock* MBB); - void dumpAllUsed(); - void dumpSets(MachineBasicBlock* MBB); - void dumpSets1(MachineBasicBlock* MBB); - void dumpAllSets(); - void dumpSRSets(); -#endif - }; } // End llvm namespace #endif diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp index df3e12a..293e306 100644 --- a/lib/CodeGen/RegAllocBase.cpp +++ b/lib/CodeGen/RegAllocBase.cpp @@ -50,6 +50,9 @@ bool RegAllocBase::VerifyEnabled = false; // RegAllocBase Implementation //===----------------------------------------------------------------------===// +// Pin the vtable to this file. +void RegAllocBase::anchor() {} + void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis, LiveRegMatrix &mat) { @@ -99,14 +102,13 @@ void RegAllocBase::allocatePhysRegs() { // result from splitting. DEBUG(dbgs() << "\nselectOrSplit " << MRI->getRegClass(VirtReg->reg)->getName() - << ':' << PrintReg(VirtReg->reg) << ' ' << *VirtReg << '\n'); - typedef SmallVector<LiveInterval*, 4> VirtRegVec; + << ':' << *VirtReg << '\n'); + typedef SmallVector<unsigned, 4> VirtRegVec; VirtRegVec SplitVRegs; unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs); if (AvailablePhysReg == ~0u) { // selectOrSplit failed to find a register! - const char *Msg = "ran out of registers during register allocation"; // Probably caused by an inline asm. MachineInstr *MI; for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(VirtReg->reg); @@ -114,9 +116,9 @@ void RegAllocBase::allocatePhysRegs() { if (MI->isInlineAsm()) break; if (MI) - MI->emitError(Msg); + MI->emitError("inline assembly requires more registers than available"); else - report_fatal_error(Msg); + report_fatal_error("ran out of registers during register allocation"); // Keep going after reporting the error. VRM->assignVirt2Phys(VirtReg->reg, RegClassInfo.getOrder(MRI->getRegClass(VirtReg->reg)).front()); @@ -128,7 +130,7 @@ void RegAllocBase::allocatePhysRegs() { for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end(); I != E; ++I) { - LiveInterval *SplitVirtReg = *I; + LiveInterval *SplitVirtReg = &LIS->getInterval(*I); assert(!VRM->hasPhys(SplitVirtReg->reg) && "Register already assigned"); if (MRI->reg_nodbg_empty(SplitVirtReg->reg)) { DEBUG(dbgs() << "not queueing unused " << *SplitVirtReg << '\n'); diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h index ccaabba..c17a8d9 100644 --- a/lib/CodeGen/RegAllocBase.h +++ b/lib/CodeGen/RegAllocBase.h @@ -38,7 +38,7 @@ #define LLVM_CODEGEN_REGALLOCBASE #include "llvm/ADT/OwningPtr.h" -#include "llvm/CodeGen/LiveIntervalUnion.h" +#include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/RegisterClassInfo.h" namespace llvm { @@ -57,6 +57,7 @@ class Spiller; /// live range splitting. They must also override enqueue/dequeue to provide an /// assignment order. class RegAllocBase { + virtual void anchor(); protected: const TargetRegisterInfo *TRI; MachineRegisterInfo *MRI; @@ -90,7 +91,7 @@ protected: // or new set of split live virtual registers. It is up to the splitter to // converge quickly toward fully spilled live ranges. virtual unsigned selectOrSplit(LiveInterval &VirtReg, - SmallVectorImpl<LiveInterval*> &splitLVRs) = 0; + SmallVectorImpl<unsigned> &splitLVRs) = 0; // Use this group name for NamedRegionTimer. static const char TimerGroupName[]; diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp index d6a7d6f..6768e45 100644 --- a/lib/CodeGen/RegAllocBasic.cpp +++ b/lib/CodeGen/RegAllocBasic.cpp @@ -102,7 +102,7 @@ public: } virtual unsigned selectOrSplit(LiveInterval &VirtReg, - SmallVectorImpl<LiveInterval*> &SplitVRegs); + SmallVectorImpl<unsigned> &SplitVRegs); /// Perform register allocation. virtual bool runOnMachineFunction(MachineFunction &mf); @@ -111,7 +111,7 @@ public: // that interfere with the most recently queried lvr. Return true if spilling // was successful, and append any new spilled/split intervals to splitLVRs. bool spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, - SmallVectorImpl<LiveInterval*> &SplitVRegs); + SmallVectorImpl<unsigned> &SplitVRegs); static char ID; }; @@ -126,7 +126,6 @@ RABasic::RABasic(): MachineFunctionPass(ID) { initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); - initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); initializeLiveStacksPass(*PassRegistry::getPassRegistry()); initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); @@ -143,7 +142,6 @@ void RABasic::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<SlotIndexes>(); AU.addRequired<LiveDebugVariables>(); AU.addPreserved<LiveDebugVariables>(); - AU.addRequired<CalculateSpillWeights>(); AU.addRequired<LiveStacks>(); AU.addPreserved<LiveStacks>(); AU.addRequired<MachineBlockFrequencyInfo>(); @@ -168,7 +166,7 @@ void RABasic::releaseMemory() { // that interfere with VirtReg. The newly spilled or split live intervals are // returned by appending them to SplitVRegs. bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, - SmallVectorImpl<LiveInterval*> &SplitVRegs) { + SmallVectorImpl<unsigned> &SplitVRegs) { // Record each interference and determine if all are spillable before mutating // either the union or live intervals. SmallVector<LiveInterval*, 8> Intfs; @@ -222,7 +220,7 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, // minimal, there is no value in caching them outside the scope of // selectOrSplit(). unsigned RABasic::selectOrSplit(LiveInterval &VirtReg, - SmallVectorImpl<LiveInterval*> &SplitVRegs) { + SmallVectorImpl<unsigned> &SplitVRegs) { // Populate a list of physical register spill candidates. SmallVector<unsigned, 8> PhysRegSpillCands; @@ -279,6 +277,11 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) { RegAllocBase::init(getAnalysis<VirtRegMap>(), getAnalysis<LiveIntervals>(), getAnalysis<LiveRegMatrix>()); + + calculateSpillWeightsAndHints(*LIS, *MF, + getAnalysis<MachineLoopInfo>(), + getAnalysis<MachineBlockFrequencyInfo>()); + SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); allocatePhysRegs(); diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index 6617e50..e92dbd2 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -144,7 +144,7 @@ namespace { // not be erased. bool isBulkSpilling; - enum { + enum LLVM_ENUM_INT_TYPE(unsigned) { spillClean = 1, spillDirty = 100, spillImpossible = ~0u @@ -298,7 +298,7 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, for (unsigned li = 0, le = LRIDbgValues.size(); li != le; ++li) { MachineInstr *DBG = LRIDbgValues[li]; const MDNode *MDPtr = DBG->getOperand(2).getMetadata(); - bool IsIndirect = DBG->getOperand(1).isImm(); // Register-indirect value? + bool IsIndirect = DBG->isIndirectDebugValue(); uint64_t Offset = IsIndirect ? DBG->getOperand(1).getImm() : 0; DebugLoc DL; if (MI == MBB->end()) { @@ -569,7 +569,10 @@ RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI, } // Nothing we can do. Report an error and keep going with a bad allocation. - MI->emitError("ran out of registers during register allocation"); + if (MI->isInlineAsm()) + MI->emitError("inline assembly requires more registers than available"); + else + MI->emitError("ran out of registers during register allocation"); definePhysReg(MI, *AO.begin(), regFree); return assignVirtToPhysReg(VirtReg, *AO.begin()); } @@ -856,7 +859,7 @@ void RAFast::AllocateBasicBlock() { } else { // Modify DBG_VALUE now that the value is in a spill slot. - bool IsIndirect = MI->getOperand(1).isImm(); + bool IsIndirect = MI->isIndirectDebugValue(); uint64_t Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; const MDNode *MDPtr = MI->getOperand(MI->getNumOperands()-1).getMetadata(); diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index f9e363b..c08d955 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -120,7 +120,9 @@ class RAGreedy : public MachineFunctionPass, RS_Done }; +#ifndef NDEBUG static const char *const StageName[]; +#endif // RegInfo - Keep additional information about each live range. struct RegInfo { @@ -147,7 +149,7 @@ class RAGreedy : public MachineFunctionPass, void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) { ExtraRegInfo.resize(MRI->getNumVirtRegs()); for (;Begin != End; ++Begin) { - unsigned Reg = (*Begin)->reg; + unsigned Reg = *Begin; if (ExtraRegInfo[Reg].Stage == RS_New) ExtraRegInfo[Reg].Stage = NewStage; } @@ -220,7 +222,7 @@ class RAGreedy : public MachineFunctionPass, /// class. SmallVector<GlobalSplitCandidate, 32> GlobalCand; - enum { NoCand = ~0u }; + enum LLVM_ENUM_INT_TYPE(unsigned) { NoCand = ~0u }; /// Candidate map. Each edge bundle is assigned to a GlobalCand entry, or to /// NoCand which indicates the stack interval. @@ -241,7 +243,7 @@ public: virtual void enqueue(LiveInterval *LI); virtual LiveInterval *dequeue(); virtual unsigned selectOrSplit(LiveInterval&, - SmallVectorImpl<LiveInterval*>&); + SmallVectorImpl<unsigned>&); /// Perform register allocation. virtual bool runOnMachineFunction(MachineFunction &mf); @@ -265,22 +267,22 @@ private: bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool); bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&); void evictInterference(LiveInterval&, unsigned, - SmallVectorImpl<LiveInterval*>&); + SmallVectorImpl<unsigned>&); unsigned tryAssign(LiveInterval&, AllocationOrder&, - SmallVectorImpl<LiveInterval*>&); + SmallVectorImpl<unsigned>&); unsigned tryEvict(LiveInterval&, AllocationOrder&, - SmallVectorImpl<LiveInterval*>&, unsigned = ~0u); + SmallVectorImpl<unsigned>&, unsigned = ~0u); unsigned tryRegionSplit(LiveInterval&, AllocationOrder&, - SmallVectorImpl<LiveInterval*>&); + SmallVectorImpl<unsigned>&); unsigned tryBlockSplit(LiveInterval&, AllocationOrder&, - SmallVectorImpl<LiveInterval*>&); + SmallVectorImpl<unsigned>&); unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&, - SmallVectorImpl<LiveInterval*>&); + SmallVectorImpl<unsigned>&); unsigned tryLocalSplit(LiveInterval&, AllocationOrder&, - SmallVectorImpl<LiveInterval*>&); + SmallVectorImpl<unsigned>&); unsigned trySplit(LiveInterval&, AllocationOrder&, - SmallVectorImpl<LiveInterval*>&); + SmallVectorImpl<unsigned>&); }; } // end anonymous namespace @@ -313,7 +315,6 @@ RAGreedy::RAGreedy(): MachineFunctionPass(ID) { initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); - initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); initializeLiveStacksPass(*PassRegistry::getPassRegistry()); initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); @@ -337,7 +338,6 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<LiveDebugVariables>(); AU.addRequired<LiveStacks>(); AU.addPreserved<LiveStacks>(); - AU.addRequired<CalculateSpillWeights>(); AU.addRequired<MachineDominatorTree>(); AU.addPreserved<MachineDominatorTree>(); AU.addRequired<MachineLoopInfo>(); @@ -455,7 +455,7 @@ LiveInterval *RAGreedy::dequeue() { /// tryAssign - Try to assign VirtReg to an available register. unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl<LiveInterval*> &NewVRegs) { + SmallVectorImpl<unsigned> &NewVRegs) { Order.rewind(); unsigned PhysReg; while ((PhysReg = Order.next())) @@ -638,7 +638,7 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, /// from being assigned to Physreg. This assumes that canEvictInterference /// returned true. void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg, - SmallVectorImpl<LiveInterval*> &NewVRegs) { + SmallVectorImpl<unsigned> &NewVRegs) { // Make sure that VirtReg has a cascade number, and assign that cascade // number to every evicted register. These live ranges than then only be // evicted by a newer cascade, preventing infinite loops. @@ -670,7 +670,7 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg, "Cannot decrease cascade number, illegal eviction"); ExtraRegInfo[Intf->reg].Cascade = Cascade; ++NumEvicted; - NewVRegs.push_back(Intf); + NewVRegs.push_back(Intf->reg); } } @@ -680,7 +680,7 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg, /// @return Physreg to assign VirtReg, or 0. unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl<LiveInterval*> &NewVRegs, + SmallVectorImpl<unsigned> &NewVRegs, unsigned CostPerUseLimit) { NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled); @@ -1125,7 +1125,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, SmallVector<unsigned, 8> IntvMap; SE->finish(&IntvMap); - DebugVars->splitRegister(Reg, LREdit.regs()); + DebugVars->splitRegister(Reg, LREdit.regs(), *LIS); ExtraRegInfo.resize(MRI->getNumVirtRegs()); unsigned OrigBlocks = SA->getNumLiveBlocks(); @@ -1136,7 +1136,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, // - Block-local splits are candidates for local splitting. // - DCE leftovers should go back on the queue. for (unsigned i = 0, e = LREdit.size(); i != e; ++i) { - LiveInterval &Reg = *LREdit.get(i); + LiveInterval &Reg = LIS->getInterval(LREdit.get(i)); // Ignore old intervals from DCE. if (getStage(Reg) != RS_New) @@ -1170,7 +1170,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, } unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl<LiveInterval*> &NewVRegs) { + SmallVectorImpl<unsigned> &NewVRegs) { unsigned NumCands = 0; unsigned BestCand = NoCand; BlockFrequency BestCost; @@ -1305,7 +1305,7 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, /// creates a lot of local live ranges, that will be split by tryLocalSplit if /// they don't allocate. unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl<LiveInterval*> &NewVRegs) { + SmallVectorImpl<unsigned> &NewVRegs) { assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed"); unsigned Reg = VirtReg.reg; bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); @@ -1326,14 +1326,14 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, SE->finish(&IntvMap); // Tell LiveDebugVariables about the new ranges. - DebugVars->splitRegister(Reg, LREdit.regs()); + DebugVars->splitRegister(Reg, LREdit.regs(), *LIS); ExtraRegInfo.resize(MRI->getNumVirtRegs()); // Sort out the new intervals created by splitting. The remainder interval // goes straight to spilling, the new local ranges get to stay RS_New. for (unsigned i = 0, e = LREdit.size(); i != e; ++i) { - LiveInterval &LI = *LREdit.get(i); + LiveInterval &LI = LIS->getInterval(LREdit.get(i)); if (getStage(LI) == RS_New && IntvMap[i] == 0) setStage(LI, RS_Spill); } @@ -1357,7 +1357,7 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, /// This is similar to spilling to a larger register class. unsigned RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl<LiveInterval*> &NewVRegs) { + SmallVectorImpl<unsigned> &NewVRegs) { // There is no point to this if there are no larger sub-classes. if (!RegClassInfo.isProperSubClass(MRI->getRegClass(VirtReg.reg))) return 0; @@ -1393,7 +1393,7 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVector<unsigned, 8> IntvMap; SE->finish(&IntvMap); - DebugVars->splitRegister(VirtReg.reg, LREdit.regs()); + DebugVars->splitRegister(VirtReg.reg, LREdit.regs(), *LIS); ExtraRegInfo.resize(MRI->getNumVirtRegs()); // Assign all new registers to RS_Spill. This was the last chance. @@ -1464,9 +1464,9 @@ void RAGreedy::calcGapWeights(unsigned PhysReg, // Add fixed interference. for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { - const LiveInterval &LI = LIS->getRegUnit(*Units); - LiveInterval::const_iterator I = LI.find(StartIdx); - LiveInterval::const_iterator E = LI.end(); + const LiveRange &LR = LIS->getRegUnit(*Units); + LiveRange::const_iterator I = LR.find(StartIdx); + LiveRange::const_iterator E = LR.end(); // Same loop as above. Mark any overlapped gaps as HUGE_VALF. for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) { @@ -1477,7 +1477,7 @@ void RAGreedy::calcGapWeights(unsigned PhysReg, break; for (; Gap != NumGaps; ++Gap) { - GapWeight[Gap] = HUGE_VALF; + GapWeight[Gap] = llvm::huge_valf; if (Uses[Gap+1].getBaseIndex() >= I->end) break; } @@ -1491,7 +1491,7 @@ void RAGreedy::calcGapWeights(unsigned PhysReg, /// basic block. /// unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl<LiveInterval*> &NewVRegs) { + SmallVectorImpl<unsigned> &NewVRegs) { assert(SA->getUseBlocks().size() == 1 && "Not a local interval"); const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front(); @@ -1583,7 +1583,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // Remove any gaps with regmask clobbers. if (Matrix->checkRegMaskInterference(VirtReg, PhysReg)) for (unsigned i = 0, e = RegMaskGaps.size(); i != e; ++i) - GapWeight[RegMaskGaps[i]] = HUGE_VALF; + GapWeight[RegMaskGaps[i]] = llvm::huge_valf; // Try to find the best sequence of gaps to close. // The new spill weight must be larger than any gap interference. @@ -1618,7 +1618,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // Legally, without causing looping? bool Legal = !ProgressRequired || NewGaps < NumGaps; - if (Legal && MaxGap < HUGE_VALF) { + if (Legal && MaxGap < llvm::huge_valf) { // Estimate the new spill weight. Each instruction reads or writes the // register. Conservatively assume there are no read-modify-write // instructions. @@ -1685,7 +1685,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, SE->useIntv(SegStart, SegStop); SmallVector<unsigned, 8> IntvMap; SE->finish(&IntvMap); - DebugVars->splitRegister(VirtReg.reg, LREdit.regs()); + DebugVars->splitRegister(VirtReg.reg, LREdit.regs(), *LIS); // If the new range has the same number of instructions as before, mark it as // RS_Split2 so the next split will be forced to make progress. Otherwise, @@ -1698,8 +1698,8 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, assert(!ProgressRequired && "Didn't make progress when it was required."); for (unsigned i = 0, e = IntvMap.size(); i != e; ++i) if (IntvMap[i] == 1) { - setStage(*LREdit.get(i), RS_Split2); - DEBUG(dbgs() << PrintReg(LREdit.get(i)->reg)); + setStage(LIS->getInterval(LREdit.get(i)), RS_Split2); + DEBUG(dbgs() << PrintReg(LREdit.get(i))); } DEBUG(dbgs() << '\n'); } @@ -1716,7 +1716,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, /// assignable. /// @return Physreg when VirtReg may be assigned and/or new NewVRegs. unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl<LiveInterval*>&NewVRegs) { + SmallVectorImpl<unsigned>&NewVRegs) { // Ranges must be Split2 or less. if (getStage(VirtReg) >= RS_Spill) return 0; @@ -1765,7 +1765,7 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, //===----------------------------------------------------------------------===// unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, - SmallVectorImpl<LiveInterval*> &NewVRegs) { + SmallVectorImpl<unsigned> &NewVRegs) { // First try assigning a free register. AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo); if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) @@ -1790,7 +1790,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, if (Stage < RS_Split) { setStage(VirtReg, RS_Split); DEBUG(dbgs() << "wait for second round\n"); - NewVRegs.push_back(&VirtReg); + NewVRegs.push_back(VirtReg.reg); return 0; } @@ -1838,6 +1838,8 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { SpillPlacer = &getAnalysis<SpillPlacement>(); DebugVars = &getAnalysis<LiveDebugVariables>(); + calculateSpillWeightsAndHints(*LIS, mf, *Loops, *MBFI); + DEBUG(LIS->dump()); SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops)); diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index 81ecca1..88c8201 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -95,7 +95,6 @@ public: : MachineFunctionPass(ID), builder(b.take()), customPassID(cPassID) { initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); - initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); initializeLiveStacksPass(*PassRegistry::getPassRegistry()); initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); } @@ -158,13 +157,13 @@ char RegAllocPBQP::ID = 0; } // End anonymous namespace. -unsigned PBQPRAProblem::getVRegForNode(PBQP::Graph::ConstNodeItr node) const { +unsigned PBQPRAProblem::getVRegForNode(PBQP::Graph::NodeId node) const { Node2VReg::const_iterator vregItr = node2VReg.find(node); assert(vregItr != node2VReg.end() && "No vreg for node."); return vregItr->second; } -PBQP::Graph::NodeItr PBQPRAProblem::getNodeForVReg(unsigned vreg) const { +PBQP::Graph::NodeId PBQPRAProblem::getNodeForVReg(unsigned vreg) const { VReg2Node::const_iterator nodeItr = vreg2Node.find(vreg); assert(nodeItr != vreg2Node.end() && "No node for vreg."); return nodeItr->second; @@ -247,7 +246,7 @@ PBQPRAProblem *PBQPBuilder::build(MachineFunction *mf, const LiveIntervals *lis, } // Construct the node. - PBQP::Graph::NodeItr node = + PBQP::Graph::NodeId node = g.addNode(PBQP::Vector(vrAllowed.size() + 1, 0)); // Record the mapping and allowed set in the problem. @@ -273,7 +272,7 @@ PBQPRAProblem *PBQPBuilder::build(MachineFunction *mf, const LiveIntervals *lis, assert(!l2.empty() && "Empty interval in vreg set?"); if (l1.overlaps(l2)) { - PBQP::Graph::EdgeItr edge = + PBQP::Graph::EdgeId edge = g.addEdge(p->getNodeForVReg(vr1), p->getNodeForVReg(vr2), PBQP::Matrix(vr1Allowed.size()+1, vr2Allowed.size()+1, 0)); @@ -364,16 +363,16 @@ PBQPRAProblem *PBQPBuilderWithCoalescing::build(MachineFunction *mf, } if (pregOpt < allowed.size()) { ++pregOpt; // +1 to account for spill option. - PBQP::Graph::NodeItr node = p->getNodeForVReg(src); + PBQP::Graph::NodeId node = p->getNodeForVReg(src); addPhysRegCoalesce(g.getNodeCosts(node), pregOpt, cBenefit); } } else { const PBQPRAProblem::AllowedSet *allowed1 = &p->getAllowedSet(dst); const PBQPRAProblem::AllowedSet *allowed2 = &p->getAllowedSet(src); - PBQP::Graph::NodeItr node1 = p->getNodeForVReg(dst); - PBQP::Graph::NodeItr node2 = p->getNodeForVReg(src); - PBQP::Graph::EdgeItr edge = g.findEdge(node1, node2); - if (edge == g.edgesEnd()) { + PBQP::Graph::NodeId node1 = p->getNodeForVReg(dst); + PBQP::Graph::NodeId node2 = p->getNodeForVReg(src); + PBQP::Graph::EdgeId edge = g.findEdge(node1, node2); + if (edge == g.invalidEdgeId()) { edge = g.addEdge(node1, node2, PBQP::Matrix(allowed1->size() + 1, allowed2->size() + 1, 0)); @@ -432,7 +431,6 @@ void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const { //au.addRequiredID(SplitCriticalEdgesID); if (customPassID) au.addRequiredID(*customPassID); - au.addRequired<CalculateSpillWeights>(); au.addRequired<LiveStacks>(); au.addPreserved<LiveStacks>(); au.addRequired<MachineBlockFrequencyInfo>(); @@ -477,11 +475,11 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem, const PBQP::Graph &g = problem.getGraph(); // Iterate over the nodes mapping the PBQP solution to a register // assignment. - for (PBQP::Graph::ConstNodeItr node = g.nodesBegin(), - nodeEnd = g.nodesEnd(); - node != nodeEnd; ++node) { - unsigned vreg = problem.getVRegForNode(node); - unsigned alloc = solution.getSelection(node); + for (PBQP::Graph::NodeItr nodeItr = g.nodesBegin(), + nodeEnd = g.nodesEnd(); + nodeItr != nodeEnd; ++nodeItr) { + unsigned vreg = problem.getVRegForNode(*nodeItr); + unsigned alloc = solution.getSelection(*nodeItr); if (problem.isPRegOption(vreg, alloc)) { unsigned preg = problem.getPRegForOption(vreg, alloc); @@ -491,7 +489,7 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem, vrm->assignVirt2Phys(vreg, preg); } else if (problem.isSpillOption(vreg, alloc)) { vregsToAlloc.erase(vreg); - SmallVector<LiveInterval*, 8> newSpills; + SmallVector<unsigned, 8> newSpills; LiveRangeEdit LRE(&lis->getInterval(vreg), newSpills, *mf, *lis, vrm); spiller->spill(LRE); @@ -502,9 +500,10 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem, // allocate. for (LiveRangeEdit::iterator itr = LRE.begin(), end = LRE.end(); itr != end; ++itr) { - assert(!(*itr)->empty() && "Empty spill range."); - DEBUG(dbgs() << PrintReg((*itr)->reg, tri) << " "); - vregsToAlloc.insert((*itr)->reg); + LiveInterval &li = lis->getInterval(*itr); + assert(!li.empty() && "Empty spill range."); + DEBUG(dbgs() << PrintReg(li.reg, tri) << " "); + vregsToAlloc.insert(li.reg); } DEBUG(dbgs() << ")\n"); @@ -550,6 +549,9 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { lss = &getAnalysis<LiveStacks>(); mbfi = &getAnalysis<MachineBlockFrequencyInfo>(); + calculateSpillWeightsAndHints(*lis, MF, getAnalysis<MachineLoopInfo>(), + *mbfi); + vrm = &getAnalysis<VirtRegMap>(); spiller.reset(createInlineSpiller(*this, MF, *vrm)); diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index f99f1a3..dd86c1f 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -398,7 +398,7 @@ void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const { } void RegisterCoalescer::eliminateDeadDefs() { - SmallVector<LiveInterval*, 8> NewRegs; + SmallVector<unsigned, 8> NewRegs; LiveRangeEdit(0, NewRegs, *MF, *LIS, 0, this).eliminateDeadDefs(DeadDefs); } @@ -434,11 +434,11 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); - // BValNo is a value number in B that is defined by a copy from A. 'B3' in + // BValNo is a value number in B that is defined by a copy from A. 'B1' in // the example above. - LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx); - if (BLR == IntB.end()) return false; - VNInfo *BValNo = BLR->valno; + LiveInterval::iterator BS = IntB.FindSegmentContaining(CopyIdx); + if (BS == IntB.end()) return false; + VNInfo *BValNo = BS->valno; // Get the location that B is defined at. Two options: either this value has // an unknown definition point or it is defined at CopyIdx. If unknown, we @@ -447,10 +447,10 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, // AValNo is the value number in A that defines the copy, A3 in the example. SlotIndex CopyUseIdx = CopyIdx.getRegSlot(true); - LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx); - // The live range might not exist after fun with physreg coalescing. - if (ALR == IntA.end()) return false; - VNInfo *AValNo = ALR->valno; + LiveInterval::iterator AS = IntA.FindSegmentContaining(CopyUseIdx); + // The live segment might not exist after fun with physreg coalescing. + if (AS == IntA.end()) return false; + VNInfo *AValNo = AS->valno; // If AValNo is defined as a copy from IntB, we can potentially process this. // Get the instruction that defines this value number. @@ -459,54 +459,54 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, if (!CP.isCoalescable(ACopyMI) || !ACopyMI->isFullCopy()) return false; - // Get the LiveRange in IntB that this value number starts with. - LiveInterval::iterator ValLR = - IntB.FindLiveRangeContaining(AValNo->def.getPrevSlot()); - if (ValLR == IntB.end()) + // Get the Segment in IntB that this value number starts with. + LiveInterval::iterator ValS = + IntB.FindSegmentContaining(AValNo->def.getPrevSlot()); + if (ValS == IntB.end()) return false; - // Make sure that the end of the live range is inside the same block as + // Make sure that the end of the live segment is inside the same block as // CopyMI. - MachineInstr *ValLREndInst = - LIS->getInstructionFromIndex(ValLR->end.getPrevSlot()); - if (!ValLREndInst || ValLREndInst->getParent() != CopyMI->getParent()) + MachineInstr *ValSEndInst = + LIS->getInstructionFromIndex(ValS->end.getPrevSlot()); + if (!ValSEndInst || ValSEndInst->getParent() != CopyMI->getParent()) return false; - // Okay, we now know that ValLR ends in the same block that the CopyMI - // live-range starts. If there are no intervening live ranges between them in - // IntB, we can merge them. - if (ValLR+1 != BLR) return false; + // Okay, we now know that ValS ends in the same block that the CopyMI + // live-range starts. If there are no intervening live segments between them + // in IntB, we can merge them. + if (ValS+1 != BS) return false; DEBUG(dbgs() << "Extending: " << PrintReg(IntB.reg, TRI)); - SlotIndex FillerStart = ValLR->end, FillerEnd = BLR->start; + SlotIndex FillerStart = ValS->end, FillerEnd = BS->start; // We are about to delete CopyMI, so need to remove it as the 'instruction // that defines this value #'. Update the valnum with the new defining // instruction #. BValNo->def = FillerStart; // Okay, we can merge them. We need to insert a new liverange: - // [ValLR.end, BLR.begin) of either value number, then we merge the + // [ValS.end, BS.begin) of either value number, then we merge the // two value numbers. - IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo)); + IntB.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, BValNo)); // Okay, merge "B1" into the same value number as "B0". - if (BValNo != ValLR->valno) - IntB.MergeValueNumberInto(BValNo, ValLR->valno); + if (BValNo != ValS->valno) + IntB.MergeValueNumberInto(BValNo, ValS->valno); DEBUG(dbgs() << " result = " << IntB << '\n'); // If the source instruction was killing the source register before the // merge, unset the isKill marker given the live range has been extended. - int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true); + int UIdx = ValSEndInst->findRegisterUseOperandIdx(IntB.reg, true); if (UIdx != -1) { - ValLREndInst->getOperand(UIdx).setIsKill(false); + ValSEndInst->getOperand(UIdx).setIsKill(false); } // Rewrite the copy. If the copy instruction was killing the destination // register before the merge, find the last use and trim the live range. That // will also add the isKill marker. CopyMI->substituteRegister(IntA.reg, IntB.reg, 0, *TRI); - if (ALR->end == CopyIdx) + if (AS->end == CopyIdx) LIS->shrinkToUses(&IntA); ++numExtends; @@ -527,11 +527,11 @@ bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA, for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); AI != AE; ++AI) { if (AI->valno != AValNo) continue; - LiveInterval::Ranges::iterator BI = - std::upper_bound(IntB.ranges.begin(), IntB.ranges.end(), AI->start); - if (BI != IntB.ranges.begin()) + LiveInterval::iterator BI = + std::upper_bound(IntB.begin(), IntB.end(), AI->start); + if (BI != IntB.begin()) --BI; - for (; BI != IntB.ranges.end() && AI->end >= BI->start; ++BI) { + for (; BI != IntB.end() && AI->end >= BI->start; ++BI) { if (BI->valno == BValNo) continue; if (BI->start <= AI->start && BI->end > AI->start) @@ -577,14 +577,12 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, LiveInterval &IntB = LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); - // BValNo is a value number in B that is defined by a copy from A. 'B3' in + // BValNo is a value number in B that is defined by a copy from A. 'B1' in // the example above. VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx); if (!BValNo || BValNo->def != CopyIdx) return false; - assert(BValNo->def == CopyIdx && "Copy doesn't define the value?"); - // AValNo is the value number in A that defines the copy, A3 in the example. VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true)); assert(AValNo && "COPY source not live"); @@ -614,7 +612,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); unsigned NewReg = NewDstMO.getReg(); - if (NewReg != IntB.reg || !LiveRangeQuery(IntB, AValNo->def).isKill()) + if (NewReg != IntB.reg || !IntB.Query(AValNo->def).isKill()) return false; // Make sure there are no other definitions of IntB that would reach the @@ -629,8 +627,8 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, UE = MRI->use_nodbg_end(); UI != UE; ++UI) { MachineInstr *UseMI = &*UI; SlotIndex UseIdx = LIS->getInstructionIndex(UseMI); - LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); - if (ULR == IntA.end() || ULR->valno != AValNo) + LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx); + if (US == IntA.end() || US->valno != AValNo) continue; // If this use is tied to a def, we can't rewrite the register. if (UseMI->isRegTiedToDefOperand(UI.getOperandNo())) @@ -681,8 +679,8 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, continue; } SlotIndex UseIdx = LIS->getInstructionIndex(UseMI).getRegSlot(true); - LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); - if (ULR == IntA.end() || ULR->valno != AValNo) + LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx); + if (US == IntA.end() || US->valno != AValNo) continue; // Kill flags are no longer accurate. They are recomputed after RA. UseMO.setIsKill(false); @@ -712,14 +710,14 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, UseMI->eraseFromParent(); } - // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition + // Extend BValNo by merging in IntA live segments of AValNo. Val# definition // is updated. VNInfo *ValNo = BValNo; ValNo->def = AValNo->def; for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); AI != AE; ++AI) { if (AI->valno != AValNo) continue; - IntB.addRange(LiveRange(AI->start, AI->end, ValNo)); + IntB.addSegment(LiveInterval::Segment(AI->start, AI->end, ValNo)); } DEBUG(dbgs() << "\t\textended: " << IntB << '\n'); @@ -744,7 +742,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, LiveInterval &SrcInt = LIS->getInterval(SrcReg); SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI); - VNInfo *ValNo = LiveRangeQuery(SrcInt, CopyIdx).valueIn(); + VNInfo *ValNo = SrcInt.Query(CopyIdx).valueIn(); assert(ValNo && "CopyMI input register not live"); if (ValNo->isPHIDef() || ValNo->isUnused()) return false; @@ -876,8 +874,8 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) { unsigned Reg = NewMIImplDefs[i]; for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) - if (LiveInterval *LI = LIS->getCachedRegUnit(*Units)) - LI->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator()); + if (LiveRange *LR = LIS->getCachedRegUnit(*Units)) + LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator()); } DEBUG(dbgs() << "Remat: " << *NewMI); @@ -1048,7 +1046,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { if (CP.getSrcReg() == CP.getDstReg()) { LiveInterval &LI = LIS->getInterval(CP.getSrcReg()); DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n'); - LiveRangeQuery LRQ(LI, LIS->getInstructionIndex(CopyMI)); + LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(CopyMI)); if (VNInfo *DefVNI = LRQ.valueDefined()) { VNInfo *ReadVNI = LRQ.valueIn(); assert(ReadVNI && "No value before copy and no <undef> flag."); @@ -1091,8 +1089,8 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { }); // When possible, let DstReg be the larger interval. - if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).ranges.size() > - LIS->getInterval(CP.getDstReg()).ranges.size()) + if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).size() > + LIS->getInterval(CP.getDstReg()).size()) CP.flip(); } @@ -1109,7 +1107,8 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy)) return true; - // If we can eliminate the copy without merging the live ranges, do so now. + // If we can eliminate the copy without merging the live segments, do so + // now. if (!CP.isPartial() && !CP.isPhys()) { if (adjustCopiesBackFrom(CP, CopyMI) || removeCopyByCommutingDef(CP, CopyMI)) { @@ -1157,10 +1156,12 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { TRI->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF); DEBUG({ - dbgs() << "\tJoined. Result = " << PrintReg(CP.getDstReg(), TRI); - if (!CP.isPhys()) + dbgs() << "\tJoined. Result = "; + if (CP.isPhys()) + dbgs() << PrintReg(CP.getDstReg(), TRI); + else dbgs() << LIS->getInterval(CP.getDstReg()); - dbgs() << '\n'; + dbgs() << '\n'; }); ++numJoins; @@ -1172,8 +1173,7 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { assert(CP.isPhys() && "Must be a physreg copy"); assert(MRI->isReserved(CP.getDstReg()) && "Not a reserved register"); LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); - DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS - << '\n'); + DEBUG(dbgs() << "\t\tRHS = " << RHS << '\n'); assert(CP.isFlipped() && RHS.containsOneValue() && "Invalid join with reserved register"); @@ -1442,7 +1442,7 @@ VNInfo *JoinVals::stripCopies(VNInfo *VNI) { unsigned Reg = MI->getOperand(1).getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg)) break; - LiveRangeQuery LRQ(LIS->getInterval(Reg), VNI->def); + LiveQueryResult LRQ = LIS->getInterval(Reg).Query(VNI->def); if (!LRQ.valueIn()) break; VNI = LRQ.valueIn(); @@ -1493,7 +1493,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { // The <read-undef> flag on the def operand means that old lane values are // not important. if (Redef) { - V.RedefVNI = LiveRangeQuery(LI, VNI->def).valueIn(); + V.RedefVNI = LI.Query(VNI->def).valueIn(); assert(V.RedefVNI && "Instruction is reading nonexistent value"); computeAssignment(V.RedefVNI->id, Other); V.ValidLanes |= Vals[V.RedefVNI->id].ValidLanes; @@ -1510,7 +1510,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { } // Find the value in Other that overlaps VNI->def, if any. - LiveRangeQuery OtherLRQ(Other.LI, VNI->def); + LiveQueryResult OtherLRQ = Other.LI.Query(VNI->def); // It is possible that both values are defined by the same instruction, or // the values are PHIs defined in the same block. When that happens, the two @@ -1969,8 +1969,8 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { JoinVals RHSVals(RHS, CP.getSrcIdx(), NewVNInfo, CP, LIS, TRI); JoinVals LHSVals(LHS, CP.getDstIdx(), NewVNInfo, CP, LIS, TRI); - DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS - << "\n\t\tLHS = " << PrintReg(CP.getDstReg()) << ' ' << LHS + DEBUG(dbgs() << "\t\tRHS = " << RHS + << "\n\t\tLHS = " << LHS << '\n'); // First compute NewVNInfo and the simple value mappings. @@ -2001,8 +2001,7 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { LIS->shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val())); // Join RHS into LHS. - LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo, - MRI); + LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo); // Kill flags are going to be wrong if the live ranges were overlapping. // Eventually, we should simply clear all kill flags when computing live @@ -2017,7 +2016,7 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { // CR_Replace conflicts. DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: " << LHS << '\n'); - LIS->extendToIndices(&LHS, EndPoints); + LIS->extendToIndices(LHS, EndPoints); return true; } @@ -2043,9 +2042,8 @@ struct MBBPriorityInfo { // block (the unsigned), and then on the MBB number. // // EnableGlobalCopies assumes that the primary sort key is loop depth. -static int compareMBBPriority(const void *L, const void *R) { - const MBBPriorityInfo *LHS = static_cast<const MBBPriorityInfo*>(L); - const MBBPriorityInfo *RHS = static_cast<const MBBPriorityInfo*>(R); +static int compareMBBPriority(const MBBPriorityInfo *LHS, + const MBBPriorityInfo *RHS) { // Deeper loops first if (LHS->Depth != RHS->Depth) return LHS->Depth > RHS->Depth ? -1 : 1; @@ -2203,7 +2201,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { const TargetSubtargetInfo &ST = TM->getSubtarget<TargetSubtargetInfo>(); if (EnableGlobalCopies == cl::BOU_UNSET) - JoinGlobalCopies = ST.enableMachineScheduler(); + JoinGlobalCopies = ST.useMachineScheduler(); else JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE); diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp index b7ab138..092ecdd 100644 --- a/lib/CodeGen/RegisterPressure.cpp +++ b/lib/CodeGen/RegisterPressure.cpp @@ -25,53 +25,19 @@ using namespace llvm; /// Increase pressure for each pressure set provided by TargetRegisterInfo. static void increaseSetPressure(std::vector<unsigned> &CurrSetPressure, - std::vector<unsigned> &MaxSetPressure, - const int *PSet, unsigned Weight) { - for (; *PSet != -1; ++PSet) { - CurrSetPressure[*PSet] += Weight; - if (&CurrSetPressure != &MaxSetPressure - && CurrSetPressure[*PSet] > MaxSetPressure[*PSet]) { - MaxSetPressure[*PSet] = CurrSetPressure[*PSet]; - } - } + PSetIterator PSetI) { + unsigned Weight = PSetI.getWeight(); + for (; PSetI.isValid(); ++PSetI) + CurrSetPressure[*PSetI] += Weight; } /// Decrease pressure for each pressure set provided by TargetRegisterInfo. static void decreaseSetPressure(std::vector<unsigned> &CurrSetPressure, - const int *PSet, unsigned Weight) { - for (; *PSet != -1; ++PSet) { - assert(CurrSetPressure[*PSet] >= Weight && "register pressure underflow"); - CurrSetPressure[*PSet] -= Weight; - } -} - -/// Directly increase pressure only within this RegisterPressure result. -void RegisterPressure::increase(unsigned Reg, const TargetRegisterInfo *TRI, - const MachineRegisterInfo *MRI) { - if (TargetRegisterInfo::isVirtualRegister(Reg)) { - const TargetRegisterClass *RC = MRI->getRegClass(Reg); - increaseSetPressure(MaxSetPressure, MaxSetPressure, - TRI->getRegClassPressureSets(RC), - TRI->getRegClassWeight(RC).RegWeight); - } - else { - increaseSetPressure(MaxSetPressure, MaxSetPressure, - TRI->getRegUnitPressureSets(Reg), - TRI->getRegUnitWeight(Reg)); - } -} - -/// Directly decrease pressure only within this RegisterPressure result. -void RegisterPressure::decrease(unsigned Reg, const TargetRegisterInfo *TRI, - const MachineRegisterInfo *MRI) { - if (TargetRegisterInfo::isVirtualRegister(Reg)) { - const TargetRegisterClass *RC = MRI->getRegClass(Reg); - decreaseSetPressure(MaxSetPressure, TRI->getRegClassPressureSets(RC), - TRI->getRegClassWeight(RC).RegWeight); - } - else { - decreaseSetPressure(MaxSetPressure, TRI->getRegUnitPressureSets(Reg), - TRI->getRegUnitWeight(Reg)); + PSetIterator PSetI) { + unsigned Weight = PSetI.getWeight(); + for (; PSetI.isValid(); ++PSetI) { + assert(CurrSetPressure[*PSetI] >= Weight && "register pressure underflow"); + CurrSetPressure[*PSetI] -= Weight; } } @@ -113,36 +79,23 @@ void RegPressureTracker::dump() const { /// Increase the current pressure as impacted by these registers and bump /// the high water mark if needed. -void RegPressureTracker::increaseRegPressure(ArrayRef<unsigned> Regs) { - for (unsigned I = 0, E = Regs.size(); I != E; ++I) { - if (TargetRegisterInfo::isVirtualRegister(Regs[I])) { - const TargetRegisterClass *RC = MRI->getRegClass(Regs[I]); - increaseSetPressure(CurrSetPressure, P.MaxSetPressure, - TRI->getRegClassPressureSets(RC), - TRI->getRegClassWeight(RC).RegWeight); - } - else { - increaseSetPressure(CurrSetPressure, P.MaxSetPressure, - TRI->getRegUnitPressureSets(Regs[I]), - TRI->getRegUnitWeight(Regs[I])); +void RegPressureTracker::increaseRegPressure(ArrayRef<unsigned> RegUnits) { + for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { + PSetIterator PSetI = MRI->getPressureSets(RegUnits[i]); + unsigned Weight = PSetI.getWeight(); + for (; PSetI.isValid(); ++PSetI) { + CurrSetPressure[*PSetI] += Weight; + if (CurrSetPressure[*PSetI] > P.MaxSetPressure[*PSetI]) { + P.MaxSetPressure[*PSetI] = CurrSetPressure[*PSetI]; + } } } } /// Simply decrease the current pressure as impacted by these registers. -void RegPressureTracker::decreaseRegPressure(ArrayRef<unsigned> Regs) { - for (unsigned I = 0, E = Regs.size(); I != E; ++I) { - if (TargetRegisterInfo::isVirtualRegister(Regs[I])) { - const TargetRegisterClass *RC = MRI->getRegClass(Regs[I]); - decreaseSetPressure(CurrSetPressure, - TRI->getRegClassPressureSets(RC), - TRI->getRegClassWeight(RC).RegWeight); - } - else { - decreaseSetPressure(CurrSetPressure, TRI->getRegUnitPressureSets(Regs[I]), - TRI->getRegUnitWeight(Regs[I])); - } - } +void RegPressureTracker::decreaseRegPressure(ArrayRef<unsigned> RegUnits) { + for (unsigned I = 0, E = RegUnits.size(); I != E; ++I) + decreaseSetPressure(CurrSetPressure, MRI->getPressureSets(RegUnits[I])); } /// Clear the result so it can be used for another round of pressure tracking. @@ -194,12 +147,30 @@ void RegionPressure::openBottom(MachineBasicBlock::const_iterator PrevBottom) { LiveInRegs.clear(); } -const LiveInterval *RegPressureTracker::getInterval(unsigned Reg) const { +const LiveRange *RegPressureTracker::getLiveRange(unsigned Reg) const { if (TargetRegisterInfo::isVirtualRegister(Reg)) return &LIS->getInterval(Reg); return LIS->getCachedRegUnit(Reg); } +void RegPressureTracker::reset() { + MBB = 0; + LIS = 0; + + CurrSetPressure.clear(); + LiveThruPressure.clear(); + P.MaxSetPressure.clear(); + + if (RequireIntervals) + static_cast<IntervalPressure&>(P).reset(); + else + static_cast<RegionPressure&>(P).reset(); + + LiveRegs.PhysRegs.clear(); + LiveRegs.VirtRegs.clear(); + UntiedDefs.clear(); +} + /// Setup the RegPressureTracker. /// /// TODO: Add support for pressure without LiveIntervals. @@ -210,6 +181,8 @@ void RegPressureTracker::init(const MachineFunction *mf, MachineBasicBlock::const_iterator pos, bool ShouldTrackUntiedDefs) { + reset(); + MF = mf; TRI = MF->getTarget().getRegisterInfo(); RCI = rci; @@ -224,19 +197,11 @@ void RegPressureTracker::init(const MachineFunction *mf, CurrPos = pos; CurrSetPressure.assign(TRI->getNumRegPressureSets(), 0); - LiveThruPressure.clear(); - if (RequireIntervals) - static_cast<IntervalPressure&>(P).reset(); - else - static_cast<RegionPressure&>(P).reset(); P.MaxSetPressure = CurrSetPressure; - LiveRegs.PhysRegs.clear(); LiveRegs.PhysRegs.setUniverse(TRI->getNumRegs()); - LiveRegs.VirtRegs.clear(); LiveRegs.VirtRegs.setUniverse(MRI->getNumVirtRegs()); - UntiedDefs.clear(); if (TrackUntiedDefs) UntiedDefs.setUniverse(MRI->getNumVirtRegs()); } @@ -328,24 +293,25 @@ void RegPressureTracker::initLiveThru(const RegPressureTracker &RPTracker) { unsigned Reg = P.LiveOutRegs[i]; if (TargetRegisterInfo::isVirtualRegister(Reg) && !RPTracker.hasUntiedDef(Reg)) { - const TargetRegisterClass *RC = MRI->getRegClass(Reg); - increaseSetPressure(LiveThruPressure, LiveThruPressure, - TRI->getRegClassPressureSets(RC), - TRI->getRegClassWeight(RC).RegWeight); + increaseSetPressure(LiveThruPressure, MRI->getPressureSets(Reg)); } } } /// \brief Convenient wrapper for checking membership in RegisterOperands. -static bool containsReg(ArrayRef<unsigned> Regs, unsigned Reg) { - return std::find(Regs.begin(), Regs.end(), Reg) != Regs.end(); +/// (std::count() doesn't have an early exit). +static bool containsReg(ArrayRef<unsigned> RegUnits, unsigned RegUnit) { + return std::find(RegUnits.begin(), RegUnits.end(), RegUnit) != RegUnits.end(); } /// Collect this instruction's unique uses and defs into SmallVectors for /// processing defs and uses in order. +/// +/// FIXME: always ignore tied opers class RegisterOperands { const TargetRegisterInfo *TRI; const MachineRegisterInfo *MRI; + bool IgnoreDead; public: SmallVector<unsigned, 8> Uses; @@ -353,7 +319,8 @@ public: SmallVector<unsigned, 8> DeadDefs; RegisterOperands(const TargetRegisterInfo *tri, - const MachineRegisterInfo *mri): TRI(tri), MRI(mri) {} + const MachineRegisterInfo *mri, bool ID = false): + TRI(tri), MRI(mri), IgnoreDead(ID) {} /// Push this operand's register onto the correct vector. void collect(const MachineOperand &MO) { @@ -362,25 +329,27 @@ public: if (MO.readsReg()) pushRegUnits(MO.getReg(), Uses); if (MO.isDef()) { - if (MO.isDead()) - pushRegUnits(MO.getReg(), DeadDefs); + if (MO.isDead()) { + if (!IgnoreDead) + pushRegUnits(MO.getReg(), DeadDefs); + } else pushRegUnits(MO.getReg(), Defs); } } protected: - void pushRegUnits(unsigned Reg, SmallVectorImpl<unsigned> &Regs) { + void pushRegUnits(unsigned Reg, SmallVectorImpl<unsigned> &RegUnits) { if (TargetRegisterInfo::isVirtualRegister(Reg)) { - if (containsReg(Regs, Reg)) + if (containsReg(RegUnits, Reg)) return; - Regs.push_back(Reg); + RegUnits.push_back(Reg); } else if (MRI->isAllocatable(Reg)) { for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { - if (containsReg(Regs, *Units)) + if (containsReg(RegUnits, *Units)) continue; - Regs.push_back(*Units); + RegUnits.push_back(*Units); } } } @@ -399,6 +368,56 @@ static void collectOperands(const MachineInstr *MI, RegOpers.DeadDefs.erase(I, RegOpers.DeadDefs.end()); } +/// Initialize an array of N PressureDiffs. +void PressureDiffs::init(unsigned N) { + Size = N; + if (N <= Max) { + memset(PDiffArray, 0, N * sizeof(PressureDiff)); + return; + } + Max = Size; + free(PDiffArray); + PDiffArray = reinterpret_cast<PressureDiff*>(calloc(N, sizeof(PressureDiff))); +} + +/// Add a change in pressure to the pressure diff of a given instruction. +void PressureDiff::addPressureChange(unsigned RegUnit, bool IsDec, + const MachineRegisterInfo *MRI) { + PSetIterator PSetI = MRI->getPressureSets(RegUnit); + int Weight = IsDec ? -PSetI.getWeight() : PSetI.getWeight(); + for (; PSetI.isValid(); ++PSetI) { + // Find an existing entry in the pressure diff for this PSet. + PressureDiff::iterator I = begin(), E = end(); + for (; I != E && I->isValid(); ++I) { + if (I->getPSet() >= *PSetI) + break; + } + // If all pressure sets are more constrained, skip the remaining PSets. + if (I == E) + break; + // Insert this PressureChange. + if (!I->isValid() || I->getPSet() != *PSetI) { + PressureChange PTmp = PressureChange(*PSetI); + for (PressureDiff::iterator J = I; J != E && PTmp.isValid(); ++J) + std::swap(*J,PTmp); + } + // Update the units for this pressure set. + I->setUnitInc(I->getUnitInc() + Weight); + } +} + +/// Record the pressure difference induced by the given operand list. +static void collectPDiff(PressureDiff &PDiff, RegisterOperands &RegOpers, + const MachineRegisterInfo *MRI) { + assert(!PDiff.begin()->isValid() && "stale PDiff"); + + for (unsigned i = 0, e = RegOpers.Defs.size(); i != e; ++i) + PDiff.addPressureChange(RegOpers.Defs[i], true, MRI); + + for (unsigned i = 0, e = RegOpers.Uses.size(); i != e; ++i) + PDiff.addPressureChange(RegOpers.Uses[i], false, MRI); +} + /// Force liveness of registers. void RegPressureTracker::addLiveRegs(ArrayRef<unsigned> Regs) { for (unsigned i = 0, e = Regs.size(); i != e; ++i) { @@ -415,7 +434,7 @@ void RegPressureTracker::discoverLiveIn(unsigned Reg) { // At live in discovery, unconditionally increase the high water mark. P.LiveInRegs.push_back(Reg); - P.increase(Reg, TRI, MRI); + increaseSetPressure(P.MaxSetPressure, MRI->getPressureSets(Reg)); } /// Add Reg to the live out set and increase max pressure. @@ -426,11 +445,16 @@ void RegPressureTracker::discoverLiveOut(unsigned Reg) { // At live out discovery, unconditionally increase the high water mark. P.LiveOutRegs.push_back(Reg); - P.increase(Reg, TRI, MRI); + increaseSetPressure(P.MaxSetPressure, MRI->getPressureSets(Reg)); } -/// Recede across the previous instruction. -bool RegPressureTracker::recede() { +/// Recede across the previous instruction. If LiveUses is provided, record any +/// RegUnits that are made live by the current instruction's uses. This includes +/// registers that are both defined and used by the instruction. If a pressure +/// difference pointer is provided record the changes is pressure caused by this +/// instruction independent of liveness. +bool RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses, + PressureDiff *PDiff) { // Check for the top of the analyzable region. if (CurrPos == MBB->begin()) { closeRegion(); @@ -463,6 +487,9 @@ bool RegPressureTracker::recede() { RegisterOperands RegOpers(TRI, MRI); collectOperands(CurrPos, RegOpers); + if (PDiff) + collectPDiff(*PDiff, RegOpers, MRI); + // Boost pressure for all dead defs together. increaseRegPressure(RegOpers.DeadDefs); decreaseRegPressure(RegOpers.DeadDefs); @@ -471,10 +498,20 @@ bool RegPressureTracker::recede() { // TODO: consider earlyclobbers? for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) { unsigned Reg = RegOpers.Defs[i]; - if (LiveRegs.erase(Reg)) - decreaseRegPressure(Reg); - else - discoverLiveOut(Reg); + bool DeadDef = false; + if (RequireIntervals) { + const LiveRange *LR = getLiveRange(Reg); + if (LR) { + LiveQueryResult LRQ = LR->Query(SlotIdx); + DeadDef = LRQ.isDeadDef(); + } + } + if (!DeadDef) { + if (LiveRegs.erase(Reg)) + decreaseRegPressure(Reg); + else + discoverLiveOut(Reg); + } } // Generate liveness for uses. @@ -483,12 +520,17 @@ bool RegPressureTracker::recede() { if (!LiveRegs.contains(Reg)) { // Adjust liveouts if LiveIntervals are available. if (RequireIntervals) { - const LiveInterval *LI = getInterval(Reg); - if (LI && !LI->killedAt(SlotIdx)) - discoverLiveOut(Reg); + const LiveRange *LR = getLiveRange(Reg); + if (LR) { + LiveQueryResult LRQ = LR->Query(SlotIdx); + if (!LRQ.isKill() && !LRQ.valueDefined()) + discoverLiveOut(Reg); + } } increaseRegPressure(Reg); LiveRegs.insert(Reg); + if (LiveUses && !containsReg(*LiveUses, Reg)) + LiveUses->push_back(Reg); } } if (TrackUntiedDefs) { @@ -537,8 +579,8 @@ bool RegPressureTracker::advance() { // Kill liveness at last uses. bool lastUse = false; if (RequireIntervals) { - const LiveInterval *LI = getInterval(Reg); - lastUse = LI && LI->killedAt(SlotIdx); + const LiveRange *LR = getLiveRange(Reg); + lastUse = LR && LR->Query(SlotIdx).isKill(); } else { // Allocatable physregs are always single-use before register rewriting. @@ -576,8 +618,7 @@ static void computeExcessPressureDelta(ArrayRef<unsigned> OldPressureVec, RegPressureDelta &Delta, const RegisterClassInfo *RCI, ArrayRef<unsigned> LiveThruPressureVec) { - int ExcessUnits = 0; - unsigned PSetID = ~0U; + Delta.Excess = PressureChange(); for (unsigned i = 0, e = OldPressureVec.size(); i < e; ++i) { unsigned POld = OldPressureVec[i]; unsigned PNew = NewPressureVec[i]; @@ -599,13 +640,11 @@ static void computeExcessPressureDelta(ArrayRef<unsigned> OldPressureVec, PDiff = Limit - POld; // Just obeyed limit. if (PDiff) { - ExcessUnits = PDiff; - PSetID = i; + Delta.Excess = PressureChange(i); + Delta.Excess.setUnitInc(PDiff); break; } } - Delta.Excess.PSetID = PSetID; - Delta.Excess.UnitIncrease = ExcessUnits; } /// Find the max change in max pressure that either surpasses a critical PSet @@ -616,11 +655,11 @@ static void computeExcessPressureDelta(ArrayRef<unsigned> OldPressureVec, /// RegPressureTracker API change to work with pressure differences. static void computeMaxPressureDelta(ArrayRef<unsigned> OldMaxPressureVec, ArrayRef<unsigned> NewMaxPressureVec, - ArrayRef<PressureElement> CriticalPSets, + ArrayRef<PressureChange> CriticalPSets, ArrayRef<unsigned> MaxPressureLimit, RegPressureDelta &Delta) { - Delta.CriticalMax = PressureElement(); - Delta.CurrentMax = PressureElement(); + Delta.CriticalMax = PressureChange(); + Delta.CurrentMax = PressureChange(); unsigned CritIdx = 0, CritEnd = CriticalPSets.size(); for (unsigned i = 0, e = OldMaxPressureVec.size(); i < e; ++i) { @@ -630,27 +669,24 @@ static void computeMaxPressureDelta(ArrayRef<unsigned> OldMaxPressureVec, continue; if (!Delta.CriticalMax.isValid()) { - while (CritIdx != CritEnd && CriticalPSets[CritIdx].PSetID < i) + while (CritIdx != CritEnd && CriticalPSets[CritIdx].getPSet() < i) ++CritIdx; - if (CritIdx != CritEnd && CriticalPSets[CritIdx].PSetID == i) { - int PDiff = (int)PNew - (int)CriticalPSets[CritIdx].UnitIncrease; + if (CritIdx != CritEnd && CriticalPSets[CritIdx].getPSet() == i) { + int PDiff = (int)PNew - (int)CriticalPSets[CritIdx].getUnitInc(); if (PDiff > 0) { - Delta.CriticalMax.PSetID = i; - Delta.CriticalMax.UnitIncrease = PDiff; + Delta.CriticalMax = PressureChange(i); + Delta.CriticalMax.setUnitInc(PDiff); } } } // Find the first increase above MaxPressureLimit. // (Ignores negative MDiff). - if (!Delta.CurrentMax.isValid()) { - int MDiff = (int)PNew - (int)MaxPressureLimit[i]; - if (MDiff > 0) { - Delta.CurrentMax.PSetID = i; - Delta.CurrentMax.UnitIncrease = MDiff; - if (CritIdx == CritEnd || Delta.CriticalMax.isValid()) - break; - } + if (!Delta.CurrentMax.isValid() && PNew > MaxPressureLimit[i]) { + Delta.CurrentMax = PressureChange(i); + Delta.CurrentMax.setUnitInc(PNew - POld); + if (CritIdx == CritEnd || Delta.CriticalMax.isValid()) + break; } } } @@ -665,7 +701,7 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) { assert(!MI->isDebugValue() && "Expect a nondebug instruction."); // Account for register pressure similar to RegPressureTracker::recede(). - RegisterOperands RegOpers(TRI, MRI); + RegisterOperands RegOpers(TRI, MRI, /*IgnoreDead=*/true); collectOperands(MI, RegOpers); // Boost max pressure for all dead defs together. @@ -676,8 +712,19 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) { // Kill liveness at live defs. for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) { unsigned Reg = RegOpers.Defs[i]; - if (!containsReg(RegOpers.Uses, Reg)) - decreaseRegPressure(Reg); + bool DeadDef = false; + if (RequireIntervals) { + const LiveRange *LR = getLiveRange(Reg); + if (LR) { + SlotIndex SlotIdx = LIS->getInstructionIndex(MI); + LiveQueryResult LRQ = LR->Query(SlotIdx); + DeadDef = LRQ.isDeadDef(); + } + } + if (!DeadDef) { + if (!containsReg(RegOpers.Uses, Reg)) + decreaseRegPressure(Reg); + } } // Generate liveness for uses. for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) { @@ -699,8 +746,9 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) { /// result per-SUnit with enough information to adjust for the current /// scheduling position. But this works as a proof of concept. void RegPressureTracker:: -getMaxUpwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, - ArrayRef<PressureElement> CriticalPSets, +getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, + RegPressureDelta &Delta, + ArrayRef<PressureChange> CriticalPSets, ArrayRef<unsigned> MaxPressureLimit) { // Snapshot Pressure. // FIXME: The snapshot heap space should persist. But I'm planning to @@ -714,12 +762,113 @@ getMaxUpwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, LiveThruPressure); computeMaxPressureDelta(SavedMaxPressure, P.MaxSetPressure, CriticalPSets, MaxPressureLimit, Delta); - assert(Delta.CriticalMax.UnitIncrease >= 0 && - Delta.CurrentMax.UnitIncrease >= 0 && "cannot decrease max pressure"); + assert(Delta.CriticalMax.getUnitInc() >= 0 && + Delta.CurrentMax.getUnitInc() >= 0 && "cannot decrease max pressure"); // Restore the tracker's state. P.MaxSetPressure.swap(SavedMaxPressure); CurrSetPressure.swap(SavedPressure); + +#ifndef NDEBUG + if (!PDiff) + return; + + // Check if the alternate algorithm yields the same result. + RegPressureDelta Delta2; + getUpwardPressureDelta(MI, *PDiff, Delta2, CriticalPSets, MaxPressureLimit); + if (Delta != Delta2) { + dbgs() << "DELTA: " << *MI; + if (Delta.Excess.isValid()) + dbgs() << "Excess1 " << TRI->getRegPressureSetName(Delta.Excess.getPSet()) + << " " << Delta.Excess.getUnitInc() << "\n"; + if (Delta.CriticalMax.isValid()) + dbgs() << "Critic1 " << TRI->getRegPressureSetName(Delta.CriticalMax.getPSet()) + << " " << Delta.CriticalMax.getUnitInc() << "\n"; + if (Delta.CurrentMax.isValid()) + dbgs() << "CurrMx1 " << TRI->getRegPressureSetName(Delta.CurrentMax.getPSet()) + << " " << Delta.CurrentMax.getUnitInc() << "\n"; + if (Delta2.Excess.isValid()) + dbgs() << "Excess2 " << TRI->getRegPressureSetName(Delta2.Excess.getPSet()) + << " " << Delta2.Excess.getUnitInc() << "\n"; + if (Delta2.CriticalMax.isValid()) + dbgs() << "Critic2 " << TRI->getRegPressureSetName(Delta2.CriticalMax.getPSet()) + << " " << Delta2.CriticalMax.getUnitInc() << "\n"; + if (Delta2.CurrentMax.isValid()) + dbgs() << "CurrMx2 " << TRI->getRegPressureSetName(Delta2.CurrentMax.getPSet()) + << " " << Delta2.CurrentMax.getUnitInc() << "\n"; + llvm_unreachable("RegP Delta Mismatch"); + } +#endif +} + +/// This is a prototype of the fast version of querying register pressure that +/// does not directly depend on current liveness. It's still slow because we +/// recompute pressure change on-the-fly. This implementation only exists to +/// prove correctness. +/// +/// @param Delta captures information needed for heuristics. +/// +/// @param CriticalPSets Are the pressure sets that are known to exceed some +/// limit within the region, not necessarily at the current position. +/// +/// @param MaxPressureLimit Is the max pressure within the region, not +/// necessarily at the current position. +void RegPressureTracker:: +getUpwardPressureDelta(const MachineInstr *MI, /*const*/ PressureDiff &PDiff, + RegPressureDelta &Delta, + ArrayRef<PressureChange> CriticalPSets, + ArrayRef<unsigned> MaxPressureLimit) const { + unsigned CritIdx = 0, CritEnd = CriticalPSets.size(); + for (PressureDiff::const_iterator + PDiffI = PDiff.begin(), PDiffE = PDiff.end(); + PDiffI != PDiffE && PDiffI->isValid(); ++PDiffI) { + + unsigned PSetID = PDiffI->getPSet(); + unsigned Limit = RCI->getRegPressureSetLimit(PSetID); + if (!LiveThruPressure.empty()) + Limit += LiveThruPressure[PSetID]; + + unsigned POld = CurrSetPressure[PSetID]; + unsigned MOld = P.MaxSetPressure[PSetID]; + unsigned MNew = MOld; + // Ignore DeadDefs here because they aren't captured by PressureChange. + unsigned PNew = POld + PDiffI->getUnitInc(); + assert((PDiffI->getUnitInc() >= 0) == (PNew >= POld) && "PSet overflow"); + if (PNew > MOld) + MNew = PNew; + // Check if current pressure has exceeded the limit. + if (!Delta.Excess.isValid()) { + unsigned ExcessInc = 0; + if (PNew > Limit) + ExcessInc = POld > Limit ? PNew - POld : PNew - Limit; + else if (POld > Limit) + ExcessInc = Limit - POld; + if (ExcessInc) { + Delta.Excess = PressureChange(PSetID); + Delta.Excess.setUnitInc(ExcessInc); + } + } + // Check if max pressure has exceeded a critical pressure set max. + if (MNew == MOld) + continue; + if (!Delta.CriticalMax.isValid()) { + while (CritIdx != CritEnd && CriticalPSets[CritIdx].getPSet() < PSetID) + ++CritIdx; + + if (CritIdx != CritEnd && CriticalPSets[CritIdx].getPSet() == PSetID) { + int CritInc = (int)MNew - (int)CriticalPSets[CritIdx].getUnitInc(); + if (CritInc > 0 && CritInc <= INT16_MAX) { + Delta.CriticalMax = PressureChange(PSetID); + Delta.CriticalMax.setUnitInc(CritInc); + } + } + } + // Check if max pressure has exceeded the current max. + if (!Delta.CurrentMax.isValid() && MNew > MaxPressureLimit[PSetID]) { + Delta.CurrentMax = PressureChange(PSetID); + Delta.CurrentMax.setUnitInc(MNew - MOld); + } + } } /// Helper to find a vreg use between two indices [PriorUseIdx, NextUseIdx). @@ -765,10 +914,12 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) { // FIXME: allow the caller to pass in the list of vreg uses that remain // to be bottom-scheduled to avoid searching uses at each query. SlotIndex CurrIdx = getCurrSlot(); - const LiveInterval *LI = getInterval(Reg); - if (LI && LI->killedAt(SlotIdx) - && !findUseBetween(Reg, CurrIdx, SlotIdx, MRI, LIS)) { - decreaseRegPressure(Reg); + const LiveRange *LR = getLiveRange(Reg); + if (LR) { + LiveQueryResult LRQ = LR->Query(SlotIdx); + if (LRQ.isKill() && !findUseBetween(Reg, CurrIdx, SlotIdx, MRI, LIS)) { + decreaseRegPressure(Reg); + } } } else if (!TargetRegisterInfo::isVirtualRegister(Reg)) { @@ -793,7 +944,7 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) { /// This assumes that the current LiveIn set is sufficient. void RegPressureTracker:: getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, - ArrayRef<PressureElement> CriticalPSets, + ArrayRef<PressureChange> CriticalPSets, ArrayRef<unsigned> MaxPressureLimit) { // Snapshot Pressure. std::vector<unsigned> SavedPressure = CurrSetPressure; @@ -805,8 +956,8 @@ getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, LiveThruPressure); computeMaxPressureDelta(SavedMaxPressure, P.MaxSetPressure, CriticalPSets, MaxPressureLimit, Delta); - assert(Delta.CriticalMax.UnitIncrease >= 0 && - Delta.CurrentMax.UnitIncrease >= 0 && "cannot decrease max pressure"); + assert(Delta.CriticalMax.getUnitInc() >= 0 && + Delta.CurrentMax.getUnitInc() >= 0 && "cannot decrease max pressure"); // Restore the tracker's state. P.MaxSetPressure.swap(SavedMaxPressure); diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 892903c..7f1f9c4 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -36,6 +36,8 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include <queue> + using namespace llvm; static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden, @@ -178,14 +180,11 @@ void ScheduleDAGInstrs::finishBlock() { void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, - unsigned endcount) { + unsigned regioninstrs) { assert(bb == BB && "startBlock should set BB"); RegionBegin = begin; RegionEnd = end; - EndIndex = endcount; - MISUnitMap.clear(); - - ScheduleDAG::clearDAG(); + NumRegionInstrs = regioninstrs; } /// Close the current scheduling region. Don't clear any state in case the @@ -405,9 +404,19 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { MachineInstr *MI = SU->getInstr(); unsigned Reg = MI->getOperand(OperIdx).getReg(); + // Record this local VReg use. + VReg2UseMap::iterator UI = VRegUses.find(Reg); + for (; UI != VRegUses.end(); ++UI) { + if (UI->SU == SU) + break; + } + if (UI == VRegUses.end()) + VRegUses.insert(VReg2SUnit(Reg, SU)); + // Lookup this operand's reaching definition. assert(LIS && "vreg dependencies requires LiveIntervals"); - LiveRangeQuery LRQ(LIS->getInterval(Reg), LIS->getInstructionIndex(MI)); + LiveQueryResult LRQ + = LIS->getInterval(Reg).Query(LIS->getInstructionIndex(MI)); VNInfo *VNI = LRQ.valueIn(); // VNI will be valid because MachineOperand::readsReg() is checked by caller. @@ -635,8 +644,7 @@ void addChainDependency (AliasAnalysis *AA, const MachineFrameInfo *MFI, bool isNormalMemory = false) { // If this is a false dependency, // do not add the edge, but rememeber the rejected node. - if (!EnableAASchedMI || - MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) { + if (!AA || MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) { SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier); Dep.setLatency(TrueMemOrderLatency); SUb->addPred(Dep); @@ -664,7 +672,7 @@ void addChainDependency (AliasAnalysis *AA, const MachineFrameInfo *MFI, void ScheduleDAGInstrs::initSUnits() { // We'll be allocating one SUnit for each real instruction in the region, // which is contained within a basic block. - SUnits.reserve(BB->size()); + SUnits.reserve(NumRegionInstrs); for (MachineBasicBlock::iterator I = RegionBegin; I != RegionEnd; ++I) { MachineInstr *MI = I; @@ -686,10 +694,22 @@ void ScheduleDAGInstrs::initSUnits() { /// DAG builder is an efficient place to do it because it already visits /// operands. void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, - RegPressureTracker *RPTracker) { + RegPressureTracker *RPTracker, + PressureDiffs *PDiffs) { + const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); + bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI + : ST.useAA(); + AliasAnalysis *AAForDep = UseAA ? AA : 0; + + MISUnitMap.clear(); + ScheduleDAG::clearDAG(); + // Create an SUnit for each real instruction. initSUnits(); + if (PDiffs) + PDiffs->init(SUnits.size()); + // We build scheduling units by walking a block's instruction list from bottom // to top. @@ -715,10 +735,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, Uses.setUniverse(TRI->getNumRegs()); assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs"); - // FIXME: Allow SparseSet to reserve space for the creation of virtual - // registers during scheduling. Don't artificially inflate the Universe - // because we want to assert that vregs are not created during DAG building. + VRegUses.clear(); VRegDefs.setUniverse(MRI.getNumVirtRegs()); + VRegUses.setUniverse(MRI.getNumVirtRegs()); // Model data dependencies between instructions being scheduled and the // ExitSU. @@ -738,17 +757,18 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, DbgMI = MI; continue; } + SUnit *SU = MISUnitMap[MI]; + assert(SU && "No SUnit mapped to this MI"); + if (RPTracker) { - RPTracker->recede(); + PressureDiff *PDiff = PDiffs ? &(*PDiffs)[SU->NodeNum] : 0; + RPTracker->recede(/*LiveUses=*/0, PDiff); assert(RPTracker->getPos() == prior(MII) && "RPTracker can't find MI"); } assert((CanHandleTerminators || (!MI->isTerminator() && !MI->isLabel())) && "Cannot schedule terminators or labels!"); - SUnit *SU = MISUnitMap[MI]; - assert(SU && "No SUnit mapped to this MI"); - // Add register-based dependencies (data, anti, and output). bool HasVRegDef = false; for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) { @@ -826,20 +846,20 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, unsigned ChainLatency = 0; if (AliasChain->getInstr()->mayLoad()) ChainLatency = TrueMemOrderLatency; - addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes, + addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes, ChainLatency); } AliasChain = SU; for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) - addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes, + addChainDependency(AAForDep, MFI, SU, PendingLoads[k], RejectMemNodes, TrueMemOrderLatency); for (MapVector<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) - addChainDependency(AA, MFI, SU, I->second, RejectMemNodes); + addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes); for (MapVector<const Value *, std::vector<SUnit *> >::iterator I = AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) { for (unsigned i = 0, e = I->second.size(); i != e; ++i) - addChainDependency(AA, MFI, SU, I->second[i], RejectMemNodes, + addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes, TrueMemOrderLatency); } adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, @@ -872,7 +892,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, MapVector<const Value *, SUnit *>::iterator IE = ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); if (I != IE) { - addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true); + addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes, + 0, true); I->second = SU; } else { if (ThisMayAlias) @@ -887,7 +908,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, ((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end()); if (J != JE) { for (unsigned i = 0, e = J->second.size(); i != e; ++i) - addChainDependency(AA, MFI, SU, J->second[i], RejectMemNodes, + addChainDependency(AAForDep, MFI, SU, J->second[i], RejectMemNodes, TrueMemOrderLatency, true); J->second.clear(); } @@ -896,11 +917,11 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, // Add dependencies from all the PendingLoads, i.e. loads // with no underlying object. for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) - addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes, + addChainDependency(AAForDep, MFI, SU, PendingLoads[k], RejectMemNodes, TrueMemOrderLatency); // Add dependence on alias chain, if needed. if (AliasChain) - addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes); + addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes); // But we also should check dependent instructions for the // SU in question. adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, @@ -930,7 +951,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, // potentially aliasing stores. for (MapVector<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) - addChainDependency(AA, MFI, SU, I->second, RejectMemNodes); + addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes); PendingLoads.push_back(SU); MayAlias = true; @@ -952,7 +973,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, MapVector<const Value *, SUnit *>::iterator IE = ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); if (I != IE) - addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true); + addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes, + 0, true); if (ThisMayAlias) AliasMemUses[V].push_back(SU); else @@ -962,7 +984,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, /*Latency=*/0); // Add dependencies on alias and barrier chains, if needed. if (MayAlias && AliasChain) - addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes); + addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes); if (BarrierChain) BarrierChain->addPred(SDep(SU, SDep::Barrier)); } diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index cb88941..43f72c5 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -35,6 +35,8 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> using namespace llvm; @@ -43,6 +45,7 @@ STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created"); STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created"); STATISTIC(OpsNarrowed , "Number of load/op/store narrowed"); STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int"); +STATISTIC(SlicedLoads, "Number of load sliced"); namespace { static cl::opt<bool> @@ -53,6 +56,14 @@ namespace { CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, cl::desc("Include global information in alias analysis")); + /// Hidden option to stress test load slicing, i.e., when this option + /// is enabled, load slicing bypasses most of its profitability guards. + static cl::opt<bool> + StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, + cl::desc("Bypass the profitability model of load " + "slicing"), + cl::init(false)); + //------------------------------ DAGCombiner ---------------------------------// class DAGCombiner { @@ -62,6 +73,7 @@ namespace { CodeGenOpt::Level OptLevel; bool LegalOperations; bool LegalTypes; + bool ForCodeSize; // Worklist of all of the nodes that need to be simplified. // @@ -144,6 +156,7 @@ namespace { bool CombineToPreIndexedLoadStore(SDNode *N); bool CombineToPostIndexedLoadStore(SDNode *N); + bool SliceUpLoad(SDNode *N); void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad); SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace); @@ -283,11 +296,11 @@ namespace { /// isAlias - Return true if there is any possibility that the two addresses /// overlap. - bool isAlias(SDValue Ptr1, int64_t Size1, + bool isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1, const Value *SrcValue1, int SrcValueOffset1, unsigned SrcValueAlign1, const MDNode *TBAAInfo1, - SDValue Ptr2, int64_t Size2, + SDValue Ptr2, int64_t Size2, bool IsVolatile2, const Value *SrcValue2, int SrcValueOffset2, unsigned SrcValueAlign2, const MDNode *TBAAInfo2) const; @@ -299,7 +312,7 @@ namespace { /// FindAliasInfo - Extracts the relevant alias information from the memory /// node. Returns true if the operand was a load. bool FindAliasInfo(SDNode *N, - SDValue &Ptr, int64_t &Size, + SDValue &Ptr, int64_t &Size, bool &IsVolatile, const Value *&SrcValue, int &SrcValueOffset, unsigned &SrcValueAlignment, const MDNode *&TBAAInfo) const; @@ -315,8 +328,15 @@ namespace { public: DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) - : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), - OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {} + : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), + OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) { + AttributeSet FnAttrs = + DAG.getMachineFunction().getFunction()->getAttributes(); + ForCodeSize = + FnAttrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::OptimizeForSize) || + FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); + } /// Run - runs the dag combiner on all nodes in the work list void Run(CombineLevel AtLevel); @@ -329,7 +349,8 @@ namespace { assert(LHSTy.isInteger() && "Shift amount is not an integer type!"); if (LHSTy.isVector()) return LHSTy; - return LegalTypes ? TLI.getScalarShiftAmountTy(LHSTy) : TLI.getPointerTy(); + return LegalTypes ? TLI.getScalarShiftAmountTy(LHSTy) + : TLI.getPointerTy(); } /// isTypeLegal - This method returns true if we are running before type @@ -744,9 +765,7 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { Replace = true; return DAG.getExtLoad(ExtType, dl, PVT, LD->getChain(), LD->getBasePtr(), - LD->getPointerInfo(), - MemVT, LD->isVolatile(), - LD->isNonTemporal(), LD->getAlignment()); + MemVT, LD->getMemOperand()); } unsigned Opc = Op.getOpcode(); @@ -967,9 +986,7 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { : LD->getExtensionType(); SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT, LD->getChain(), LD->getBasePtr(), - LD->getPointerInfo(), - MemVT, LD->isVolatile(), - LD->isNonTemporal(), LD->getAlignment()); + MemVT, LD->getMemOperand()); SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD); DEBUG(dbgs() << "\nPromoting "; @@ -1017,7 +1034,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) { // try and combine it. while (!WorkListContents.empty()) { SDNode *N; - // The WorkListOrder holds the SDNodes in order, but it may contain duplicates. + // The WorkListOrder holds the SDNodes in order, but it may contain + // duplicates. // In order to avoid a linear scan, we use a set (O(log N)) to hold what the // worklist *should* contain, and check the node we want to visit is should // actually be visited. @@ -1617,19 +1635,8 @@ static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT, bool LegalOperations, bool LegalTypes) { if (!VT.isVector()) return DAG.getConstant(0, VT); - if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) { - // Produce a vector of zeros. - EVT ElemTy = VT.getVectorElementType(); - if (LegalTypes && TLI.getTypeAction(*DAG.getContext(), ElemTy) == - TargetLowering::TypePromoteInteger) - ElemTy = TLI.getTypeToTransformTo(*DAG.getContext(), ElemTy); - assert((!LegalTypes || TLI.isTypeLegal(ElemTy)) && - "Type for zero vector elements is not legal"); - SDValue El = DAG.getConstant(0, ElemTy); - std::vector<SDValue> Ops(VT.getVectorNumElements(), El); - return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, - &Ops[0], Ops.size()); - } + if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) + return DAG.getConstant(0, VT); return SDValue(); } @@ -1771,8 +1778,8 @@ SDValue DAGCombiner::visitSUBE(SDNode *N) { return SDValue(); } -/// isConstantSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are -/// all the same constant or undefined. +/// isConstantSplatVector - Returns true if N is a BUILD_VECTOR node whose +/// elements are all the same constant or undefined. static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) { BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N); if (!C) @@ -1808,9 +1815,11 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1); } else { N0IsConst = dyn_cast<ConstantSDNode>(N0) != 0; - ConstValue0 = N0IsConst? (dyn_cast<ConstantSDNode>(N0))->getAPIntValue() : APInt(); + ConstValue0 = N0IsConst ? (dyn_cast<ConstantSDNode>(N0))->getAPIntValue() + : APInt(); N1IsConst = dyn_cast<ConstantSDNode>(N1) != 0; - ConstValue1 = N1IsConst? (dyn_cast<ConstantSDNode>(N1))->getAPIntValue() : APInt(); + ConstValue1 = N1IsConst ? (dyn_cast<ConstantSDNode>(N1))->getAPIntValue() + : APInt(); } // fold (mul c1, c2) -> c1*c2 @@ -1823,20 +1832,24 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { // fold (mul x, 0) -> 0 if (N1IsConst && ConstValue1 == 0) return N1; + // We require a splat of the entire scalar bit width for non-contiguous + // bit patterns. + bool IsFullSplat = + ConstValue1.getBitWidth() == VT.getScalarType().getSizeInBits(); // fold (mul x, 1) -> x - if (N1IsConst && ConstValue1 == 1) + if (N1IsConst && ConstValue1 == 1 && IsFullSplat) return N0; // fold (mul x, -1) -> 0-x if (N1IsConst && ConstValue1.isAllOnesValue()) return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), N0); // fold (mul x, (1 << c)) -> x << c - if (N1IsConst && ConstValue1.isPowerOf2()) + if (N1IsConst && ConstValue1.isPowerOf2() && IsFullSplat) return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, DAG.getConstant(ConstValue1.logBase2(), getShiftAmountTy(N0.getValueType()))); // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c - if (N1IsConst && (-ConstValue1).isPowerOf2()) { + if (N1IsConst && (-ConstValue1).isPowerOf2() && IsFullSplat) { unsigned Log2Val = (-ConstValue1).logBase2(); // FIXME: If the input is something that is easily negated (e.g. a // single-use add), we should put the negate there. @@ -2675,6 +2688,19 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); } } + // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2) + if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) && + Op0 == Op1 && LL.getValueType().isInteger() && + Op0 == ISD::SETNE && ((cast<ConstantSDNode>(LR)->isNullValue() && + cast<ConstantSDNode>(RR)->isAllOnesValue()) || + (cast<ConstantSDNode>(LR)->isAllOnesValue() && + cast<ConstantSDNode>(RR)->isNullValue()))) { + SDValue ADDNode = DAG.getNode(ISD::ADD, SDLoc(N0), LL.getValueType(), + LL, DAG.getConstant(1, LL.getValueType())); + AddToWorkList(ADDNode.getNode()); + return DAG.getSetCC(SDLoc(N), VT, ADDNode, + DAG.getConstant(2, LL.getValueType()), ISD::SETUGE); + } // canonicalize equivalent to ll == rl if (LL == RR && LR == RL) { Op1 = ISD::getSetCCSwappedOperands(Op1); @@ -2718,9 +2744,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(), LN0->getBasePtr(), - LN0->getPointerInfo(), MemVT, - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + MemVT, LN0->getMemOperand()); AddToWorkList(N); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -2739,11 +2763,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) { ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, - LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - MemVT, - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getChain(), LN0->getBasePtr(), + MemVT, LN0->getMemOperand()); AddToWorkList(N); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -2773,10 +2794,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) { SDValue NewLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, - LN0->getChain(), LN0->getBasePtr(), - LN0->getPointerInfo(), - ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getChain(), LN0->getBasePtr(), ExtVT, + LN0->getMemOperand()); AddToWorkList(N); CombineTo(LN0, NewLoad, NewLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -2812,7 +2831,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { LN0->getChain(), NewPtr, LN0->getPointerInfo(), ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), - Alignment); + Alignment, LN0->getTBAAInfo()); AddToWorkList(N); CombineTo(LN0, Load, Load.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -2848,6 +2867,14 @@ SDValue DAGCombiner::visitAND(SDNode *N) { } } + // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const) + if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) { + SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), + N0.getOperand(1), false); + if (BSwap.getNode()) + return BSwap; + } + return SDValue(); } @@ -2932,13 +2959,23 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, if (N00 != N10) return SDValue(); - // Make sure everything beyond the low halfword is zero since the SRL 16 - // will clear the top bits. + // Make sure everything beyond the low halfword gets set to zero since the SRL + // 16 will clear the top bits. unsigned OpSizeInBits = VT.getSizeInBits(); - if (DemandHighBits && OpSizeInBits > 16 && - (!LookPassAnd0 || !LookPassAnd1) && - !DAG.MaskedValueIsZero(N10, APInt::getHighBitsSet(OpSizeInBits, 16))) - return SDValue(); + if (DemandHighBits && OpSizeInBits > 16) { + // If the left-shift isn't masked out then the only way this is a bswap is + // if all bits beyond the low 8 are 0. In that case the entire pattern + // reduces to a left shift anyway: leave it for other parts of the combiner. + if (!LookPassAnd0) + return SDValue(); + + // However, if the right shift isn't masked out then it might be because + // it's not needed. See if we can spot that too. + if (!LookPassAnd1 && + !DAG.MaskedValueIsZero( + N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16))) + return SDValue(); + } SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00); if (OpSizeInBits > 16) @@ -3078,7 +3115,7 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { SDValue BSwap = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, SDValue(Parts[0],0)); - // Result of the bswap should be rotated by 16. If it's not legal, than + // Result of the bswap should be rotated by 16. If it's not legal, then // do (x << 16) | (x >> 16). SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT)); if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) @@ -3343,29 +3380,9 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { if (LHSMask.getNode() || RHSMask.getNode()) return 0; - // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotl x, y) - // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotr x, (sub 32, y)) - if (RHSShiftAmt.getOpcode() == ISD::SUB && - LHSShiftAmt == RHSShiftAmt.getOperand(1)) { - if (ConstantSDNode *SUBC = - dyn_cast<ConstantSDNode>(RHSShiftAmt.getOperand(0))) { - if (SUBC->getAPIntValue() == OpSizeInBits) - return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, - HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); - } - } - - // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotr x, y) - // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotl x, (sub 32, y)) - if (LHSShiftAmt.getOpcode() == ISD::SUB && - RHSShiftAmt == LHSShiftAmt.getOperand(1)) - if (ConstantSDNode *SUBC = - dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0))) - if (SUBC->getAPIntValue() == OpSizeInBits) - return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg, - HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); - - // Look for sign/zext/any-extended or truncate cases: + // If the shift amount is sign/zext/any-extended just peel it off. + SDValue LExtOp0 = LHSShiftAmt; + SDValue RExtOp0 = RHSShiftAmt; if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || @@ -3374,33 +3391,31 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) { - SDValue LExtOp0 = LHSShiftAmt.getOperand(0); - SDValue RExtOp0 = RHSShiftAmt.getOperand(0); - if (RExtOp0.getOpcode() == ISD::SUB && - RExtOp0.getOperand(1) == LExtOp0) { - // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> - // (rotl x, y) - // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> - // (rotr x, (sub 32, y)) - if (ConstantSDNode *SUBC = + LExtOp0 = LHSShiftAmt.getOperand(0); + RExtOp0 = RHSShiftAmt.getOperand(0); + } + + if (RExtOp0.getOpcode() == ISD::SUB && RExtOp0.getOperand(1) == LExtOp0) { + // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> + // (rotl x, y) + // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> + // (rotr x, (sub 32, y)) + if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0))) - if (SUBC->getAPIntValue() == OpSizeInBits) - return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, - LHSShiftArg, - HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); - } else if (LExtOp0.getOpcode() == ISD::SUB && - RExtOp0 == LExtOp0.getOperand(1)) { - // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> - // (rotr x, y) - // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> - // (rotl x, (sub 32, y)) - if (ConstantSDNode *SUBC = + if (SUBC->getAPIntValue() == OpSizeInBits) + return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, + HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); + } else if (LExtOp0.getOpcode() == ISD::SUB && + RExtOp0 == LExtOp0.getOperand(1)) { + // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> + // (rotr x, y) + // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> + // (rotl x, (sub 32, y)) + if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0))) - if (SUBC->getAPIntValue() == OpSizeInBits) - return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, - LHSShiftArg, - HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); - } + if (SUBC->getAPIntValue() == OpSizeInBits) + return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg, + HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); } return 0; @@ -3620,6 +3635,12 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + // fold (shl c1, c2) -> c1<<c2 if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C); @@ -3697,6 +3718,27 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { } } + // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C)) + // Only fold this if the inner zext has no other uses to avoid increasing + // the total number of instructions. + if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() && + N0.getOperand(0).getOpcode() == ISD::SRL && + isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) { + uint64_t c1 = + cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue(); + if (c1 < VT.getSizeInBits()) { + uint64_t c2 = N1C->getZExtValue(); + if (c1 == c2) { + SDValue NewOp0 = N0.getOperand(0); + EVT CountVT = NewOp0.getOperand(1).getValueType(); + SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(), + NewOp0, DAG.getConstant(c2, CountVT)); + AddToWorkList(NewSHL.getNode()); + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL); + } + } + } + // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or // (and (srl x, (sub c1, c2), MASK) // Only fold this if the inner shift has no other uses -- if it does, folding @@ -3750,6 +3792,12 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + // fold (sra c1, c2) -> (sra c1, c2) if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C); @@ -3895,6 +3943,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + // fold (srl c1, c2) -> c1 >>u c2 if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C); @@ -4217,6 +4271,23 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { return SDValue(); } +static +std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) { + SDLoc DL(N); + EVT LoVT, HiVT; + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + + // Split the inputs. + SDValue Lo, Hi, LL, LH, RL, RH; + llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); + llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); + + Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); + Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); + + return std::make_pair(Lo, Hi); +} + SDValue DAGCombiner::visitVSELECT(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -4254,6 +4325,34 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { } } + // If the VSELECT result requires splitting and the mask is provided by a + // SETCC, then split both nodes and its operands before legalization. This + // prevents the type legalizer from unrolling SETCC into scalar comparisons + // and enables future optimizations (e.g. min/max pattern matching on X86). + if (N0.getOpcode() == ISD::SETCC) { + EVT VT = N->getValueType(0); + + // Check if any splitting is required. + if (TLI.getTypeAction(*DAG.getContext(), VT) != + TargetLowering::TypeSplitVector) + return SDValue(); + + SDValue Lo, Hi, CCLo, CCHi, LL, LH, RL, RH; + llvm::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG); + llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 1); + llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 2); + + Lo = DAG.getNode(N->getOpcode(), DL, LL.getValueType(), CCLo, LL, RL); + Hi = DAG.getNode(N->getOpcode(), DL, LH.getValueType(), CCHi, LH, RH); + + // Add the new VSELECT nodes to the work list in case they need to be split + // again. + AddToWorkList(Lo.getNode()); + AddToWorkList(Hi.getNode()); + + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); + } + return SDValue(); } @@ -4469,10 +4568,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - N0.getValueType(), - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), N0.getValueType(), + LN0->getMemOperand()); CombineTo(N, ExtLoad); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad); @@ -4493,10 +4590,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) { SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - MemVT, - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), MemVT, + LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), DAG.getNode(ISD::TRUNCATE, SDLoc(N0), @@ -4524,11 +4619,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { if (DoXform) { SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT, LN0->getChain(), LN0->getBasePtr(), - LN0->getPointerInfo(), LN0->getMemoryVT(), - LN0->isVolatile(), - LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getMemOperand()); APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); Mask = Mask.sext(VT.getSizeInBits()); SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT, @@ -4593,9 +4685,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(VT)))) { return DAG.getSelect(SDLoc(N), VT, DAG.getSetCC(SDLoc(N), - getSetCCResultType(VT), - N0.getOperand(0), N0.getOperand(1), - cast<CondCodeSDNode>(N0.getOperand(2))->get()), + getSetCCResultType(VT), + N0.getOperand(0), N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()), NegOne, DAG.getConstant(0, VT)); } } @@ -4762,10 +4854,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - N0.getValueType(), - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), N0.getValueType(), + LN0->getMemOperand()); CombineTo(N, ExtLoad); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad); @@ -4795,11 +4885,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if (DoXform) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT, LN0->getChain(), LN0->getBasePtr(), - LN0->getPointerInfo(), LN0->getMemoryVT(), - LN0->isVolatile(), - LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getMemOperand()); APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); Mask = Mask.zext(VT.getSizeInBits()); SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT, @@ -4826,10 +4913,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - MemVT, - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), MemVT, + LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), @@ -4992,10 +5077,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - N0.getValueType(), - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), N0.getValueType(), + LN0->getMemOperand()); CombineTo(N, ExtLoad); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad); @@ -5016,9 +5099,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { EVT MemVT = LN0->getMemoryVT(); SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(N), VT, LN0->getChain(), LN0->getBasePtr(), - LN0->getPointerInfo(), MemVT, - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + MemVT, LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), DAG.getNode(ISD::TRUNCATE, SDLoc(N0), @@ -5250,12 +5331,12 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), LN0->isVolatile(), LN0->isNonTemporal(), - LN0->isInvariant(), NewAlign); + LN0->isInvariant(), NewAlign, LN0->getTBAAInfo()); else Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(),NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), - NewAlign); + NewAlign, LN0->getTBAAInfo()); // Replace the old load's chain with the new load's chain. WorkListRemover DeadNodes(*this); @@ -5353,10 +5434,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - EVT, - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), EVT, + LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); AddToWorkList(ExtLoad.getNode()); @@ -5371,10 +5450,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - EVT, - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), EVT, + LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -5657,7 +5734,8 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && // Do not change the width of a volatile load. !cast<LoadSDNode>(N0)->isVolatile() && - (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) { + (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) && + TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); unsigned Align = TLI.getDataLayout()-> getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); @@ -5667,7 +5745,8 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(), LN0->getPointerInfo(), LN0->isVolatile(), LN0->isNonTemporal(), - LN0->isInvariant(), OrigAlign); + LN0->isInvariant(), OrigAlign, + LN0->getTBAAInfo()); AddToWorkList(N); CombineTo(N0.getNode(), DAG.getNode(ISD::BITCAST, SDLoc(N0), @@ -6652,16 +6731,14 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { } // fold (fpext (load x)) -> (fpext (fptrunc (extload x))) - if (ISD::isNON_EXTLoad(N0.getNode()) && N0.hasOneUse() && + if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - N0.getValueType(), - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), N0.getValueType(), + LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), DAG.getNode(ISD::FP_ROUND, SDLoc(N0), @@ -7451,13 +7528,16 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { LD->getValueType(0), Chain, Ptr, LD->getPointerInfo(), LD->getMemoryVT(), - LD->isVolatile(), LD->isNonTemporal(), Align); + LD->isVolatile(), LD->isNonTemporal(), Align, + LD->getTBAAInfo()); return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true); } } } - if (CombinerAA) { + bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA : + TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA(); + if (UseAA) { // Walk up chain skipping non-aliasing memory nodes. SDValue BetterChain = FindBetterChain(N, Chain); @@ -7468,17 +7548,12 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // Replace the chain to void dependency. if (LD->getExtensionType() == ISD::NON_EXTLOAD) { ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD), - BetterChain, Ptr, LD->getPointerInfo(), - LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), LD->getAlignment()); + BetterChain, Ptr, LD->getMemOperand()); } else { ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), LD->getValueType(0), - BetterChain, Ptr, LD->getPointerInfo(), - LD->getMemoryVT(), - LD->isVolatile(), - LD->isNonTemporal(), - LD->getAlignment()); + BetterChain, Ptr, LD->getMemoryVT(), + LD->getMemOperand()); } // Create token factor to keep old chain connected. @@ -7498,9 +7573,562 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) return SDValue(N, 0); + // Try to slice up N to more direct loads if the slices are mapped to + // different register banks or pairing can take place. + if (SliceUpLoad(N)) + return SDValue(N, 0); + return SDValue(); } +namespace { +/// \brief Helper structure used to slice a load in smaller loads. +/// Basically a slice is obtained from the following sequence: +/// Origin = load Ty1, Base +/// Shift = srl Ty1 Origin, CstTy Amount +/// Inst = trunc Shift to Ty2 +/// +/// Then, it will be rewriten into: +/// Slice = load SliceTy, Base + SliceOffset +/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2 +/// +/// SliceTy is deduced from the number of bits that are actually used to +/// build Inst. +struct LoadedSlice { + /// \brief Helper structure used to compute the cost of a slice. + struct Cost { + /// Are we optimizing for code size. + bool ForCodeSize; + /// Various cost. + unsigned Loads; + unsigned Truncates; + unsigned CrossRegisterBanksCopies; + unsigned ZExts; + unsigned Shift; + + Cost(bool ForCodeSize = false) + : ForCodeSize(ForCodeSize), Loads(0), Truncates(0), + CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {} + + /// \brief Get the cost of one isolated slice. + Cost(const LoadedSlice &LS, bool ForCodeSize = false) + : ForCodeSize(ForCodeSize), Loads(1), Truncates(0), + CrossRegisterBanksCopies(0), ZExts(0), Shift(0) { + EVT TruncType = LS.Inst->getValueType(0); + EVT LoadedType = LS.getLoadedType(); + if (TruncType != LoadedType && + !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType)) + ZExts = 1; + } + + /// \brief Account for slicing gain in the current cost. + /// Slicing provide a few gains like removing a shift or a + /// truncate. This method allows to grow the cost of the original + /// load with the gain from this slice. + void addSliceGain(const LoadedSlice &LS) { + // Each slice saves a truncate. + const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo(); + if (!TLI.isTruncateFree(LS.Inst->getValueType(0), + LS.Inst->getOperand(0).getValueType())) + ++Truncates; + // If there is a shift amount, this slice gets rid of it. + if (LS.Shift) + ++Shift; + // If this slice can merge a cross register bank copy, account for it. + if (LS.canMergeExpensiveCrossRegisterBankCopy()) + ++CrossRegisterBanksCopies; + } + + Cost &operator+=(const Cost &RHS) { + Loads += RHS.Loads; + Truncates += RHS.Truncates; + CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies; + ZExts += RHS.ZExts; + Shift += RHS.Shift; + return *this; + } + + bool operator==(const Cost &RHS) const { + return Loads == RHS.Loads && Truncates == RHS.Truncates && + CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies && + ZExts == RHS.ZExts && Shift == RHS.Shift; + } + + bool operator!=(const Cost &RHS) const { return !(*this == RHS); } + + bool operator<(const Cost &RHS) const { + // Assume cross register banks copies are as expensive as loads. + // FIXME: Do we want some more target hooks? + unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies; + unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies; + // Unless we are optimizing for code size, consider the + // expensive operation first. + if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS) + return ExpensiveOpsLHS < ExpensiveOpsRHS; + return (Truncates + ZExts + Shift + ExpensiveOpsLHS) < + (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS); + } + + bool operator>(const Cost &RHS) const { return RHS < *this; } + + bool operator<=(const Cost &RHS) const { return !(RHS < *this); } + + bool operator>=(const Cost &RHS) const { return !(*this < RHS); } + }; + // The last instruction that represent the slice. This should be a + // truncate instruction. + SDNode *Inst; + // The original load instruction. + LoadSDNode *Origin; + // The right shift amount in bits from the original load. + unsigned Shift; + // The DAG from which Origin came from. + // This is used to get some contextual information about legal types, etc. + SelectionDAG *DAG; + + LoadedSlice(SDNode *Inst = NULL, LoadSDNode *Origin = NULL, + unsigned Shift = 0, SelectionDAG *DAG = NULL) + : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {} + + LoadedSlice(const LoadedSlice &LS) + : Inst(LS.Inst), Origin(LS.Origin), Shift(LS.Shift), DAG(LS.DAG) {} + + /// \brief Get the bits used in a chunk of bits \p BitWidth large. + /// \return Result is \p BitWidth and has used bits set to 1 and + /// not used bits set to 0. + APInt getUsedBits() const { + // Reproduce the trunc(lshr) sequence: + // - Start from the truncated value. + // - Zero extend to the desired bit width. + // - Shift left. + assert(Origin && "No original load to compare against."); + unsigned BitWidth = Origin->getValueSizeInBits(0); + assert(Inst && "This slice is not bound to an instruction"); + assert(Inst->getValueSizeInBits(0) <= BitWidth && + "Extracted slice is bigger than the whole type!"); + APInt UsedBits(Inst->getValueSizeInBits(0), 0); + UsedBits.setAllBits(); + UsedBits = UsedBits.zext(BitWidth); + UsedBits <<= Shift; + return UsedBits; + } + + /// \brief Get the size of the slice to be loaded in bytes. + unsigned getLoadedSize() const { + unsigned SliceSize = getUsedBits().countPopulation(); + assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte."); + return SliceSize / 8; + } + + /// \brief Get the type that will be loaded for this slice. + /// Note: This may not be the final type for the slice. + EVT getLoadedType() const { + assert(DAG && "Missing context"); + LLVMContext &Ctxt = *DAG->getContext(); + return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8); + } + + /// \brief Get the alignment of the load used for this slice. + unsigned getAlignment() const { + unsigned Alignment = Origin->getAlignment(); + unsigned Offset = getOffsetFromBase(); + if (Offset != 0) + Alignment = MinAlign(Alignment, Alignment + Offset); + return Alignment; + } + + /// \brief Check if this slice can be rewritten with legal operations. + bool isLegal() const { + // An invalid slice is not legal. + if (!Origin || !Inst || !DAG) + return false; + + // Offsets are for indexed load only, we do not handle that. + if (Origin->getOffset().getOpcode() != ISD::UNDEF) + return false; + + const TargetLowering &TLI = DAG->getTargetLoweringInfo(); + + // Check that the type is legal. + EVT SliceType = getLoadedType(); + if (!TLI.isTypeLegal(SliceType)) + return false; + + // Check that the load is legal for this type. + if (!TLI.isOperationLegal(ISD::LOAD, SliceType)) + return false; + + // Check that the offset can be computed. + // 1. Check its type. + EVT PtrType = Origin->getBasePtr().getValueType(); + if (PtrType == MVT::Untyped || PtrType.isExtended()) + return false; + + // 2. Check that it fits in the immediate. + if (!TLI.isLegalAddImmediate(getOffsetFromBase())) + return false; + + // 3. Check that the computation is legal. + if (!TLI.isOperationLegal(ISD::ADD, PtrType)) + return false; + + // Check that the zext is legal if it needs one. + EVT TruncateType = Inst->getValueType(0); + if (TruncateType != SliceType && + !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType)) + return false; + + return true; + } + + /// \brief Get the offset in bytes of this slice in the original chunk of + /// bits. + /// \pre DAG != NULL. + uint64_t getOffsetFromBase() const { + assert(DAG && "Missing context."); + bool IsBigEndian = + DAG->getTargetLoweringInfo().getDataLayout()->isBigEndian(); + assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported."); + uint64_t Offset = Shift / 8; + unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8; + assert(!(Origin->getValueSizeInBits(0) & 0x7) && + "The size of the original loaded type is not a multiple of a" + " byte."); + // If Offset is bigger than TySizeInBytes, it means we are loading all + // zeros. This should have been optimized before in the process. + assert(TySizeInBytes > Offset && + "Invalid shift amount for given loaded size"); + if (IsBigEndian) + Offset = TySizeInBytes - Offset - getLoadedSize(); + return Offset; + } + + /// \brief Generate the sequence of instructions to load the slice + /// represented by this object and redirect the uses of this slice to + /// this new sequence of instructions. + /// \pre this->Inst && this->Origin are valid Instructions and this + /// object passed the legal check: LoadedSlice::isLegal returned true. + /// \return The last instruction of the sequence used to load the slice. + SDValue loadSlice() const { + assert(Inst && Origin && "Unable to replace a non-existing slice."); + const SDValue &OldBaseAddr = Origin->getBasePtr(); + SDValue BaseAddr = OldBaseAddr; + // Get the offset in that chunk of bytes w.r.t. the endianess. + int64_t Offset = static_cast<int64_t>(getOffsetFromBase()); + assert(Offset >= 0 && "Offset too big to fit in int64_t!"); + if (Offset) { + // BaseAddr = BaseAddr + Offset. + EVT ArithType = BaseAddr.getValueType(); + BaseAddr = DAG->getNode(ISD::ADD, SDLoc(Origin), ArithType, BaseAddr, + DAG->getConstant(Offset, ArithType)); + } + + // Create the type of the loaded slice according to its size. + EVT SliceType = getLoadedType(); + + // Create the load for the slice. + SDValue LastInst = DAG->getLoad( + SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr, + Origin->getPointerInfo().getWithOffset(Offset), Origin->isVolatile(), + Origin->isNonTemporal(), Origin->isInvariant(), getAlignment()); + // If the final type is not the same as the loaded type, this means that + // we have to pad with zero. Create a zero extend for that. + EVT FinalType = Inst->getValueType(0); + if (SliceType != FinalType) + LastInst = + DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst); + return LastInst; + } + + /// \brief Check if this slice can be merged with an expensive cross register + /// bank copy. E.g., + /// i = load i32 + /// f = bitcast i32 i to float + bool canMergeExpensiveCrossRegisterBankCopy() const { + if (!Inst || !Inst->hasOneUse()) + return false; + SDNode *Use = *Inst->use_begin(); + if (Use->getOpcode() != ISD::BITCAST) + return false; + assert(DAG && "Missing context"); + const TargetLowering &TLI = DAG->getTargetLoweringInfo(); + EVT ResVT = Use->getValueType(0); + const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT()); + const TargetRegisterClass *ArgRC = + TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT()); + if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT)) + return false; + + // At this point, we know that we perform a cross-register-bank copy. + // Check if it is expensive. + const TargetRegisterInfo *TRI = TLI.getTargetMachine().getRegisterInfo(); + // Assume bitcasts are cheap, unless both register classes do not + // explicitly share a common sub class. + if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC)) + return false; + + // Check if it will be merged with the load. + // 1. Check the alignment constraint. + unsigned RequiredAlignment = TLI.getDataLayout()->getABITypeAlignment( + ResVT.getTypeForEVT(*DAG->getContext())); + + if (RequiredAlignment > getAlignment()) + return false; + + // 2. Check that the load is a legal operation for that type. + if (!TLI.isOperationLegal(ISD::LOAD, ResVT)) + return false; + + // 3. Check that we do not have a zext in the way. + if (Inst->getValueType(0) != getLoadedType()) + return false; + + return true; + } +}; +} + +/// \brief Sorts LoadedSlice according to their offset. +struct LoadedSliceSorter { + bool operator()(const LoadedSlice &LHS, const LoadedSlice &RHS) { + assert(LHS.Origin == RHS.Origin && "Different bases not implemented."); + return LHS.getOffsetFromBase() < RHS.getOffsetFromBase(); + } +}; + +/// \brief Check that all bits set in \p UsedBits form a dense region, i.e., +/// \p UsedBits looks like 0..0 1..1 0..0. +static bool areUsedBitsDense(const APInt &UsedBits) { + // If all the bits are one, this is dense! + if (UsedBits.isAllOnesValue()) + return true; + + // Get rid of the unused bits on the right. + APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros()); + // Get rid of the unused bits on the left. + if (NarrowedUsedBits.countLeadingZeros()) + NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits()); + // Check that the chunk of bits is completely used. + return NarrowedUsedBits.isAllOnesValue(); +} + +/// \brief Check whether or not \p First and \p Second are next to each other +/// in memory. This means that there is no hole between the bits loaded +/// by \p First and the bits loaded by \p Second. +static bool areSlicesNextToEachOther(const LoadedSlice &First, + const LoadedSlice &Second) { + assert(First.Origin == Second.Origin && First.Origin && + "Unable to match different memory origins."); + APInt UsedBits = First.getUsedBits(); + assert((UsedBits & Second.getUsedBits()) == 0 && + "Slices are not supposed to overlap."); + UsedBits |= Second.getUsedBits(); + return areUsedBitsDense(UsedBits); +} + +/// \brief Adjust the \p GlobalLSCost according to the target +/// paring capabilities and the layout of the slices. +/// \pre \p GlobalLSCost should account for at least as many loads as +/// there is in the slices in \p LoadedSlices. +static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices, + LoadedSlice::Cost &GlobalLSCost) { + unsigned NumberOfSlices = LoadedSlices.size(); + // If there is less than 2 elements, no pairing is possible. + if (NumberOfSlices < 2) + return; + + // Sort the slices so that elements that are likely to be next to each + // other in memory are next to each other in the list. + std::sort(LoadedSlices.begin(), LoadedSlices.end(), LoadedSliceSorter()); + const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo(); + // First (resp. Second) is the first (resp. Second) potentially candidate + // to be placed in a paired load. + const LoadedSlice *First = NULL; + const LoadedSlice *Second = NULL; + for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice, + // Set the beginning of the pair. + First = Second) { + + Second = &LoadedSlices[CurrSlice]; + + // If First is NULL, it means we start a new pair. + // Get to the next slice. + if (!First) + continue; + + EVT LoadedType = First->getLoadedType(); + + // If the types of the slices are different, we cannot pair them. + if (LoadedType != Second->getLoadedType()) + continue; + + // Check if the target supplies paired loads for this type. + unsigned RequiredAlignment = 0; + if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) { + // move to the next pair, this type is hopeless. + Second = NULL; + continue; + } + // Check if we meet the alignment requirement. + if (RequiredAlignment > First->getAlignment()) + continue; + + // Check that both loads are next to each other in memory. + if (!areSlicesNextToEachOther(*First, *Second)) + continue; + + assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!"); + --GlobalLSCost.Loads; + // Move to the next pair. + Second = NULL; + } +} + +/// \brief Check the profitability of all involved LoadedSlice. +/// Currently, it is considered profitable if there is exactly two +/// involved slices (1) which are (2) next to each other in memory, and +/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3). +/// +/// Note: The order of the elements in \p LoadedSlices may be modified, but not +/// the elements themselves. +/// +/// FIXME: When the cost model will be mature enough, we can relax +/// constraints (1) and (2). +static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices, + const APInt &UsedBits, bool ForCodeSize) { + unsigned NumberOfSlices = LoadedSlices.size(); + if (StressLoadSlicing) + return NumberOfSlices > 1; + + // Check (1). + if (NumberOfSlices != 2) + return false; + + // Check (2). + if (!areUsedBitsDense(UsedBits)) + return false; + + // Check (3). + LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize); + // The original code has one big load. + OrigCost.Loads = 1; + for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) { + const LoadedSlice &LS = LoadedSlices[CurrSlice]; + // Accumulate the cost of all the slices. + LoadedSlice::Cost SliceCost(LS, ForCodeSize); + GlobalSlicingCost += SliceCost; + + // Account as cost in the original configuration the gain obtained + // with the current slices. + OrigCost.addSliceGain(LS); + } + + // If the target supports paired load, adjust the cost accordingly. + adjustCostForPairing(LoadedSlices, GlobalSlicingCost); + return OrigCost > GlobalSlicingCost; +} + +/// \brief If the given load, \p LI, is used only by trunc or trunc(lshr) +/// operations, split it in the various pieces being extracted. +/// +/// This sort of thing is introduced by SROA. +/// This slicing takes care not to insert overlapping loads. +/// \pre LI is a simple load (i.e., not an atomic or volatile load). +bool DAGCombiner::SliceUpLoad(SDNode *N) { + if (Level < AfterLegalizeDAG) + return false; + + LoadSDNode *LD = cast<LoadSDNode>(N); + if (LD->isVolatile() || !ISD::isNormalLoad(LD) || + !LD->getValueType(0).isInteger()) + return false; + + // Keep track of already used bits to detect overlapping values. + // In that case, we will just abort the transformation. + APInt UsedBits(LD->getValueSizeInBits(0), 0); + + SmallVector<LoadedSlice, 4> LoadedSlices; + + // Check if this load is used as several smaller chunks of bits. + // Basically, look for uses in trunc or trunc(lshr) and record a new chain + // of computation for each trunc. + for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end(); + UI != UIEnd; ++UI) { + // Skip the uses of the chain. + if (UI.getUse().getResNo() != 0) + continue; + + SDNode *User = *UI; + unsigned Shift = 0; + + // Check if this is a trunc(lshr). + if (User->getOpcode() == ISD::SRL && User->hasOneUse() && + isa<ConstantSDNode>(User->getOperand(1))) { + Shift = cast<ConstantSDNode>(User->getOperand(1))->getZExtValue(); + User = *User->use_begin(); + } + + // At this point, User is a Truncate, iff we encountered, trunc or + // trunc(lshr). + if (User->getOpcode() != ISD::TRUNCATE) + return false; + + // The width of the type must be a power of 2 and greater than 8-bits. + // Otherwise the load cannot be represented in LLVM IR. + // Moreover, if we shifted with a non 8-bits multiple, the slice + // will be accross several bytes. We do not support that. + unsigned Width = User->getValueSizeInBits(0); + if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7)) + return 0; + + // Build the slice for this chain of computations. + LoadedSlice LS(User, LD, Shift, &DAG); + APInt CurrentUsedBits = LS.getUsedBits(); + + // Check if this slice overlaps with another. + if ((CurrentUsedBits & UsedBits) != 0) + return false; + // Update the bits used globally. + UsedBits |= CurrentUsedBits; + + // Check if the new slice would be legal. + if (!LS.isLegal()) + return false; + + // Record the slice. + LoadedSlices.push_back(LS); + } + + // Abort slicing if it does not seem to be profitable. + if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize)) + return false; + + ++SlicedLoads; + + // Rewrite each chain to use an independent load. + // By construction, each chain can be represented by a unique load. + + // Prepare the argument for the new token factor for all the slices. + SmallVector<SDValue, 8> ArgChains; + for (SmallVectorImpl<LoadedSlice>::const_iterator + LSIt = LoadedSlices.begin(), + LSItEnd = LoadedSlices.end(); + LSIt != LSItEnd; ++LSIt) { + SDValue SliceInst = LSIt->loadSlice(); + CombineTo(LSIt->Inst, SliceInst, true); + if (SliceInst.getNode()->getOpcode() != ISD::LOAD) + SliceInst = SliceInst.getOperand(0); + assert(SliceInst->getOpcode() == ISD::LOAD && + "It takes more than a zext to get to the loaded slice!!"); + ArgChains.push_back(SliceInst.getValue(1)); + } + + SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, + &ArgChains[0], ArgChains.size()); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); + return true; +} + /// CheckForMaskedLoad - Check to see if V is (and load (ptr), imm), where the /// load is having specific bytes cleared out. If so, return the byte size /// being masked out and the shift amount. @@ -7735,7 +8363,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { LD->getChain(), NewPtr, LD->getPointerInfo().getWithOffset(PtrOff), LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), NewAlign); + LD->isInvariant(), NewAlign, + LD->getTBAAInfo()); SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD, DAG.getConstant(NewImm, NewVT)); SDValue NewST = DAG.getStore(Chain, SDLoc(N), @@ -7846,17 +8475,28 @@ struct BaseIndexOffset { static BaseIndexOffset match(SDValue Ptr) { bool IsIndexSignExt = false; - // Just Base or possibly anything else. + // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD + // instruction, then it could be just the BASE or everything else we don't + // know how to handle. Just use Ptr as BASE and give up. if (Ptr->getOpcode() != ISD::ADD) return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); - // Base + offset. + // We know that we have at least an ADD instruction. Try to pattern match + // the simple case of BASE + OFFSET. if (isa<ConstantSDNode>(Ptr->getOperand(1))) { int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue(); return BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset, IsIndexSignExt); } + // Inside a loop the current BASE pointer is calculated using an ADD and a + // MUL instruction. In this case Ptr is the actual BASE pointer. + // (i64 add (i64 %array_ptr) + // (i64 mul (i64 %induction_var) + // (i64 %element_size))) + if (Ptr->getOperand(1)->getOpcode() == ISD::MUL) + return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); + // Look at Base + Index + Offset cases. SDValue Base = Ptr->getOperand(0); SDValue IndexOffset = Ptr->getOperand(1); @@ -8007,6 +8647,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { Index = STn; break; } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) { + if (Ldn->isVolatile()) { + Index = NULL; + break; + } + // Save the load node for later. Continue the scan. AliasLoadNodes.push_back(Ldn); NextInChain = Ldn->getChain().getNode(); @@ -8384,7 +9029,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { TLI.isOperationLegalOrCustom(ISD::STORE, SVT))) return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr, ST->getPointerInfo(), ST->isVolatile(), - ST->isNonTemporal(), OrigAlign); + ST->isNonTemporal(), OrigAlign, + ST->getTBAAInfo()); } // Turn 'store undef, Ptr' -> nothing. @@ -8399,7 +9045,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // transform should not be done in this case. if (Value.getOpcode() != ISD::TargetConstantFP) { SDValue Tmp; - switch (CFP->getValueType(0).getSimpleVT().SimpleTy) { + switch (CFP->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unknown FP type"); case MVT::f16: // We don't do this for these yet. case MVT::f80: @@ -8412,8 +9058,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). bitcastToAPInt().getZExtValue(), MVT::i32); return DAG.getStore(Chain, SDLoc(N), Tmp, - Ptr, ST->getPointerInfo(), ST->isVolatile(), - ST->isNonTemporal(), ST->getAlignment()); + Ptr, ST->getMemOperand()); } break; case MVT::f64: @@ -8423,8 +9068,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). getZExtValue(), MVT::i64); return DAG.getStore(Chain, SDLoc(N), Tmp, - Ptr, ST->getPointerInfo(), ST->isVolatile(), - ST->isNonTemporal(), ST->getAlignment()); + Ptr, ST->getMemOperand()); } if (!ST->isVolatile() && @@ -8440,18 +9084,19 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); + const MDNode *TBAAInfo = ST->getTBAAInfo(); SDValue St0 = DAG.getStore(Chain, SDLoc(ST), Lo, Ptr, ST->getPointerInfo(), isVolatile, isNonTemporal, - ST->getAlignment()); + ST->getAlignment(), TBAAInfo); Ptr = DAG.getNode(ISD::ADD, SDLoc(N), Ptr.getValueType(), Ptr, DAG.getConstant(4, Ptr.getValueType())); Alignment = MinAlign(Alignment, 4U); SDValue St1 = DAG.getStore(Chain, SDLoc(ST), Hi, Ptr, ST->getPointerInfo().getWithOffset(4), isVolatile, isNonTemporal, - Alignment); + Alignment, TBAAInfo); return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, St0, St1); } @@ -8467,7 +9112,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (Align > ST->getAlignment()) return DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(), ST->getMemoryVT(), - ST->isVolatile(), ST->isNonTemporal(), Align); + ST->isVolatile(), ST->isNonTemporal(), Align, + ST->getTBAAInfo()); } } @@ -8477,7 +9123,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (NewST.getNode()) return NewST; - if (CombinerAA) { + bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA : + TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA(); + if (UseAA) { // Walk up chain skipping non-aliasing memory nodes. SDValue BetterChain = FindBetterChain(N, Chain); @@ -8488,14 +9136,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // Replace the chain to avoid dependency. if (ST->isTruncatingStore()) { ReplStore = DAG.getTruncStore(BetterChain, SDLoc(N), Value, Ptr, - ST->getPointerInfo(), - ST->getMemoryVT(), ST->isVolatile(), - ST->isNonTemporal(), ST->getAlignment()); + ST->getMemoryVT(), ST->getMemOperand()); } else { ReplStore = DAG.getStore(BetterChain, SDLoc(N), Value, Ptr, - ST->getPointerInfo(), - ST->isVolatile(), ST->isNonTemporal(), - ST->getAlignment()); + ST->getMemOperand()); } // Create token to keep both nodes around. @@ -8528,9 +9172,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { AddToWorkList(Value.getNode()); if (Shorter.getNode()) return DAG.getTruncStore(Chain, SDLoc(N), Shorter, - Ptr, ST->getPointerInfo(), ST->getMemoryVT(), - ST->isVolatile(), ST->isNonTemporal(), - ST->getAlignment()); + Ptr, ST->getMemoryVT(), ST->getMemOperand()); // Otherwise, see if we can simplify the operation with // SimplifyDemandedBits, which only works if the value has a single use. @@ -8561,9 +9203,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), ST->getMemoryVT())) { return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0), - Ptr, ST->getPointerInfo(), ST->getMemoryVT(), - ST->isVolatile(), ST->isNonTemporal(), - ST->getAlignment()); + Ptr, ST->getMemoryVT(), ST->getMemOperand()); } // Only perform this optimization before the types are legal, because we @@ -8821,13 +9461,14 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { ? ISD::ZEXTLOAD : ISD::EXTLOAD; Load = DAG.getExtLoad(ExtType, SDLoc(N), NVT, LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), - LVT, LN0->isVolatile(), LN0->isNonTemporal(),Align); + LVT, LN0->isVolatile(), LN0->isNonTemporal(), + Align, LN0->getTBAAInfo()); Chain = Load.getValue(1); } else { Load = DAG.getLoad(LVT, SDLoc(N), LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), LN0->isVolatile(), LN0->isNonTemporal(), - LN0->isInvariant(), Align); + LN0->isInvariant(), Align, LN0->getTBAAInfo()); Chain = Load.getValue(1); if (NVT.bitsLT(LVT)) Load = DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, Load); @@ -9165,8 +9806,35 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { return N->getOperand(0); // Check if all of the operands are undefs. + EVT VT = N->getValueType(0); if (ISD::allOperandsUndef(N)) - return DAG.getUNDEF(N->getValueType(0)); + return DAG.getUNDEF(VT); + + // Optimize concat_vectors where one of the vectors is undef. + if (N->getNumOperands() == 2 && + N->getOperand(1)->getOpcode() == ISD::UNDEF) { + SDValue In = N->getOperand(0); + assert(In.getValueType().isVector() && "Must concat vectors"); + + // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr). + if (In->getOpcode() == ISD::BITCAST && + !In->getOperand(0)->getValueType(0).isVector()) { + SDValue Scalar = In->getOperand(0); + EVT SclTy = Scalar->getValueType(0); + + if (!SclTy.isFloatingPoint() && !SclTy.isInteger()) + return SDValue(); + + EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, + VT.getSizeInBits() / SclTy.getSizeInBits()); + if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType())) + return SDValue(); + + SDLoc dl = SDLoc(N); + SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NVT, Scalar); + return DAG.getNode(ISD::BITCAST, dl, VT, Res); + } + } // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR // nodes often generate nop CONCAT_VECTOR nodes. @@ -9225,7 +9893,8 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { // (extract_subvec (concat V1, V2, ...), i) // Into: // Vi if possible - // Only operand 0 is checked as 'concat' assumes all inputs of the same type. + // Only operand 0 is checked as 'concat' assumes all inputs of the same + // type. if (V->getOperand(0).getValueType() != NVT) return SDValue(); unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); @@ -9358,10 +10027,10 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { for (unsigned i = 0; i != NumElts; ++i) { int Idx = SVN->getMaskElt(i); if (Idx >= 0) { - if (Idx < (int)NumElts) - Idx += NumElts; - else + if (Idx >= (int)NumElts) Idx -= NumElts; + else + Idx = -1; // remove reference to lhs } NewMask.push_back(Idx); } @@ -9738,7 +10407,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, if (LLD->getExtensionType() == ISD::NON_EXTLOAD) { Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect), - // FIXME: Discards pointer info. + // FIXME: Discards pointer and TBAA info. LLD->getChain(), Addr, MachinePointerInfo(), LLD->isVolatile(), LLD->isNonTemporal(), LLD->isInvariant(), LLD->getAlignment()); @@ -9747,7 +10416,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, RLD->getExtensionType() : LLD->getExtensionType(), SDLoc(TheSelect), TheSelect->getValueType(0), - // FIXME: Discards pointer info. + // FIXME: Discards pointer and TBAA info. LLD->getChain(), Addr, MachinePointerInfo(), LLD->getMemoryVT(), LLD->isVolatile(), LLD->isNonTemporal(), LLD->getAlignment()); @@ -9852,7 +10521,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero); AddToWorkList(CstOffset.getNode()); - CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx, + CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset); AddToWorkList(CPIdx.getNode()); return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, @@ -9974,9 +10643,10 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, return Temp; // shl setcc result by log2 n2c - return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp, - DAG.getConstant(N2C->getAPIntValue().logBase2(), - getShiftAmountTy(Temp.getValueType()))); + return DAG.getNode( + ISD::SHL, DL, N2.getValueType(), Temp, + DAG.getConstant(N2C->getAPIntValue().logBase2(), + getShiftAmountTy(Temp.getValueType()))); } } @@ -10132,17 +10802,20 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, /// isAlias - Return true if there is any possibility that the two addresses /// overlap. -bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, +bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1, const Value *SrcValue1, int SrcValueOffset1, unsigned SrcValueAlign1, const MDNode *TBAAInfo1, - SDValue Ptr2, int64_t Size2, + SDValue Ptr2, int64_t Size2, bool IsVolatile2, const Value *SrcValue2, int SrcValueOffset2, unsigned SrcValueAlign2, const MDNode *TBAAInfo2) const { // If they are the same then they must be aliases. if (Ptr1 == Ptr2) return true; + // If they are both volatile then they cannot be reordered. + if (IsVolatile1 && IsVolatile2) return true; + // Gather base node and offset information. SDValue Base1, Base2; int64_t Offset1, Offset2; @@ -10187,7 +10860,9 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, return false; } - if (CombinerGlobalAA) { + bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0 ? CombinerGlobalAA : + TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA(); + if (UseAA && SrcValue1 && SrcValue2) { // Use alias analysis information. int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2); int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset; @@ -10206,24 +10881,25 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) { SDValue Ptr0, Ptr1; int64_t Size0, Size1; + bool IsVolatile0, IsVolatile1; const Value *SrcValue0, *SrcValue1; int SrcValueOffset0, SrcValueOffset1; unsigned SrcValueAlign0, SrcValueAlign1; const MDNode *SrcTBAAInfo0, *SrcTBAAInfo1; - FindAliasInfo(Op0, Ptr0, Size0, SrcValue0, SrcValueOffset0, + FindAliasInfo(Op0, Ptr0, Size0, IsVolatile0, SrcValue0, SrcValueOffset0, SrcValueAlign0, SrcTBAAInfo0); - FindAliasInfo(Op1, Ptr1, Size1, SrcValue1, SrcValueOffset1, + FindAliasInfo(Op1, Ptr1, Size1, IsVolatile1, SrcValue1, SrcValueOffset1, SrcValueAlign1, SrcTBAAInfo1); - return isAlias(Ptr0, Size0, SrcValue0, SrcValueOffset0, + return isAlias(Ptr0, Size0, IsVolatile0, SrcValue0, SrcValueOffset0, SrcValueAlign0, SrcTBAAInfo0, - Ptr1, Size1, SrcValue1, SrcValueOffset1, + Ptr1, Size1, IsVolatile1, SrcValue1, SrcValueOffset1, SrcValueAlign1, SrcTBAAInfo1); } /// FindAliasInfo - Extracts the relevant alias information from the memory -/// node. Returns true if the operand was a load. +/// node. Returns true if the operand was a nonvolatile load. bool DAGCombiner::FindAliasInfo(SDNode *N, - SDValue &Ptr, int64_t &Size, + SDValue &Ptr, int64_t &Size, bool &IsVolatile, const Value *&SrcValue, int &SrcValueOffset, unsigned &SrcValueAlign, @@ -10232,11 +10908,12 @@ bool DAGCombiner::FindAliasInfo(SDNode *N, Ptr = LS->getBasePtr(); Size = LS->getMemoryVT().getSizeInBits() >> 3; + IsVolatile = LS->isVolatile(); SrcValue = LS->getSrcValue(); SrcValueOffset = LS->getSrcValueOffset(); SrcValueAlign = LS->getOriginalAlignment(); TBAAInfo = LS->getTBAAInfo(); - return isa<LoadSDNode>(LS); + return isa<LoadSDNode>(LS) && !IsVolatile; } /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, @@ -10249,12 +10926,13 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, // Get alias information for node. SDValue Ptr; int64_t Size; + bool IsVolatile; const Value *SrcValue; int SrcValueOffset; unsigned SrcValueAlign; const MDNode *SrcTBAAInfo; - bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset, - SrcValueAlign, SrcTBAAInfo); + bool IsLoad = FindAliasInfo(N, Ptr, Size, IsVolatile, SrcValue, + SrcValueOffset, SrcValueAlign, SrcTBAAInfo); // Starting off. Chains.push_back(OriginalChain); @@ -10295,20 +10973,21 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, // Get alias information for Chain. SDValue OpPtr; int64_t OpSize; + bool OpIsVolatile; const Value *OpSrcValue; int OpSrcValueOffset; unsigned OpSrcValueAlign; const MDNode *OpSrcTBAAInfo; bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize, - OpSrcValue, OpSrcValueOffset, + OpIsVolatile, OpSrcValue, OpSrcValueOffset, OpSrcValueAlign, OpSrcTBAAInfo); // If chain is alias then stop here. if (!(IsLoad && IsOpLoad) && - isAlias(Ptr, Size, SrcValue, SrcValueOffset, SrcValueAlign, - SrcTBAAInfo, - OpPtr, OpSize, OpSrcValue, OpSrcValueOffset, + isAlias(Ptr, Size, IsVolatile, SrcValue, SrcValueOffset, + SrcValueAlign, SrcTBAAInfo, + OpPtr, OpSize, OpIsVolatile, OpSrcValue, OpSrcValueOffset, OpSrcValueAlign, OpSrcTBAAInfo)) { Aliases.push_back(Chain); } else { diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index b4ac948f..a6f7461 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -638,29 +638,25 @@ bool FastISel::SelectCall(const User *I) { (!isa<AllocaInst>(Address) || !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(Address)))) Op = MachineOperand::CreateReg(FuncInfo.InitializeRegForValue(Address), - false); + false); - if (Op) + if (Op) { if (Op->isReg()) { - // Set the indirect flag if the type and the DIVariable's - // indirect field are in disagreement: Indirectly-addressed - // variables that are nonpointer types should be marked as - // indirect, and VLAs should be marked as indirect eventhough - // they are a pointer type. - bool IsIndirect = DI->getAddress()->getType()->isPointerTy() - ^ DIVar.isIndirect(); Op->setIsDebug(true); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(TargetOpcode::DBG_VALUE), - IsIndirect, Op->getReg(), Offset, DI->getVariable()); + TII.get(TargetOpcode::DBG_VALUE), false, Op->getReg(), 0, + DI->getVariable()); } else BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(TargetOpcode::DBG_VALUE)).addOperand(*Op).addImm(0) - .addMetadata(DI->getVariable()); - else + TII.get(TargetOpcode::DBG_VALUE)) + .addOperand(*Op) + .addImm(0) + .addMetadata(DI->getVariable()); + } else { // We can't yet handle anything else here because it would require // generating code, thus altering codegen because of debug info. DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); + } return true; } case Intrinsic::dbg_value: { @@ -688,6 +684,7 @@ bool FastISel::SelectCall(const User *I) { .addFPImm(CF).addImm(DI->getOffset()) .addMetadata(DI->getVariable()); } else if (unsigned Reg = lookUpRegForValue(V)) { + // FIXME: This does not handle register-indirect values at offset 0. bool IsIndirect = DI->getOffset() != 0; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, IsIndirect, Reg, DI->getOffset(), DI->getVariable()); @@ -1574,4 +1571,19 @@ bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) { return tryToFoldLoadIntoMI(User, RI.getOperandNo(), LI); } +bool FastISel::canFoldAddIntoGEP(const User *GEP, const Value *Add) { + // Must be an add. + if (!isa<AddOperator>(Add)) + return false; + // Type size needs to match. + if (TD.getTypeSizeInBits(GEP->getType()) != + TD.getTypeSizeInBits(Add->getType())) + return false; + // Must be in the same basic block. + if (isa<Instruction>(Add) && + FuncInfo.MBBMap[cast<Instruction>(Add)->getParent()] != FuncInfo.MBB) + return false; + // Must have a constant operand. + return isa<ConstantInt>(cast<AddOperator>(Add)->getOperand(1)); +} diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index e107276..3a8fb85 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/DataLayout.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -211,6 +212,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, assert(Node->getMachineOpcode() != TargetOpcode::IMPLICIT_DEF && "IMPLICIT_DEF should have been handled as a special case elsewhere!"); + unsigned NumResults = CountResults(Node); for (unsigned i = 0; i < II.getNumDefs(); ++i) { // If the specific node value is only used by a CopyToReg and the dest reg // is a vreg in the same register class, use the CopyToReg'd destination @@ -218,6 +220,10 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, unsigned VRBase = 0; const TargetRegisterClass *RC = TRI->getAllocatableClass(TII->getRegClass(II, i, TRI, *MF)); + // If the register class is unknown for the given definition, then try to + // infer one from the value type. + if (!RC && i < NumResults) + RC = TLI->getRegClassFor(Node->getSimpleValueType(i)); if (II.OpInfo[i].isOptionalDef()) { // Optional def must be a physical register. unsigned NumResults = CountResults(Node); @@ -722,10 +728,20 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, const MCInstrDesc &II = TII->get(Opc); unsigned NumResults = CountResults(Node); + unsigned NumDefs = II.getNumDefs(); + const uint16_t *ScratchRegs = NULL; + + // Handle PATCHPOINT specially and then use the generic code. + if (Opc == TargetOpcode::PATCHPOINT) { + unsigned CC = Node->getConstantOperandVal(PatchPointOpers::CCPos); + NumDefs = NumResults; + ScratchRegs = TLI->getScratchRegisters((CallingConv::ID) CC); + } + unsigned NumImpUses = 0; unsigned NodeOperands = - countOperands(Node, II.getNumOperands() - II.getNumDefs(), NumImpUses); - bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0; + countOperands(Node, II.getNumOperands() - NumDefs, NumImpUses); + bool HasPhysRegOuts = NumResults > NumDefs && II.getImplicitDefs()!=0; #ifndef NDEBUG unsigned NumMIOperands = NodeOperands + NumResults; if (II.isVariadic()) @@ -748,14 +764,20 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // Emit all of the actual operands of this instruction, adding them to the // instruction as appropriate. - bool HasOptPRefs = II.getNumDefs() > NumResults; + bool HasOptPRefs = NumDefs > NumResults; assert((!HasOptPRefs || !HasPhysRegOuts) && "Unable to cope with optional defs and phys regs defs!"); - unsigned NumSkip = HasOptPRefs ? II.getNumDefs() - NumResults : 0; + unsigned NumSkip = HasOptPRefs ? NumDefs - NumResults : 0; for (unsigned i = NumSkip; i != NodeOperands; ++i) - AddOperand(MIB, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II, + AddOperand(MIB, Node->getOperand(i), i-NumSkip+NumDefs, &II, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); + // Add scratch registers as implicit def and early clobber + if (ScratchRegs) + for (unsigned i = 0; ScratchRegs[i]; ++i) + MIB.addReg(ScratchRegs[i], RegState::ImplicitDefine | + RegState::EarlyClobber); + // Transfer all of the memory reference descriptions of this instruction. MIB.setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(), cast<MachineSDNode>(Node)->memoperands_end()); @@ -784,8 +806,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // Additional results must be physical register defs. if (HasPhysRegOuts) { - for (unsigned i = II.getNumDefs(); i < NumResults; ++i) { - unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()]; + for (unsigned i = NumDefs; i < NumResults; ++i) { + unsigned Reg = II.getImplicitDefs()[i - NumDefs]; if (!Node->hasAnyUseOfValue(i)) continue; // This implicitly defined physreg has a use. diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index bd844e5..9061ae9 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -95,8 +95,8 @@ private: SDValue N1, SDValue N2, ArrayRef<int> Mask) const; - void LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, - SDLoc dl); + bool LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, + bool &NeedInvert, SDLoc dl); SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); SDValue ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops, @@ -311,6 +311,8 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, SDValue Val = ST->getValue(); EVT VT = Val.getValueType(); int Alignment = ST->getAlignment(); + unsigned AS = ST->getAddressSpace(); + SDLoc dl(ST); if (ST->getMemoryVT().isFloatingPoint() || ST->getMemoryVT().isVector()) { @@ -343,7 +345,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, SDValue Store = DAG.getTruncStore(Chain, dl, Val, StackPtr, MachinePointerInfo(), StoredVT, false, false, 0); - SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy()); + SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy(AS)); SmallVector<SDValue, 8> Stores; unsigned Offset = 0; @@ -381,7 +383,8 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, .getWithOffset(Offset), MemVT, ST->isVolatile(), ST->isNonTemporal(), - MinAlign(ST->getAlignment(), Offset))); + MinAlign(ST->getAlignment(), Offset), + ST->getTBAAInfo())); // The order of the stores doesn't matter - say it with a TokenFactor. SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], @@ -408,13 +411,14 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, Store1 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Lo:Hi, Ptr, ST->getPointerInfo(), NewStoredVT, ST->isVolatile(), ST->isNonTemporal(), Alignment); + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, TLI.getPointerTy())); + DAG.getConstant(IncrementSize, TLI.getPointerTy(AS))); Alignment = MinAlign(Alignment, IncrementSize); Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, ST->isVolatile(), ST->isNonTemporal(), - Alignment); + Alignment, ST->getTBAAInfo()); SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); @@ -438,10 +442,8 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, if (TLI.isTypeLegal(intVT) && TLI.isTypeLegal(LoadedVT)) { // Expand to a (misaligned) integer load of the same size, // then bitconvert to floating point or vector. - SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getPointerInfo(), - LD->isVolatile(), - LD->isNonTemporal(), - LD->isInvariant(), LD->getAlignment()); + SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, + LD->getMemOperand()); SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad); if (LoadedVT != VT) Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND : @@ -474,7 +476,8 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, LD->getPointerInfo().getWithOffset(Offset), LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(), - MinAlign(LD->getAlignment(), Offset)); + MinAlign(LD->getAlignment(), Offset), + LD->getTBAAInfo()); // Follow the load with a store to the stack slot. Remember the store. Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr, MachinePointerInfo(), false, false, 0)); @@ -492,7 +495,8 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, LD->getPointerInfo().getWithOffset(Offset), MemVT, LD->isVolatile(), LD->isNonTemporal(), - MinAlign(LD->getAlignment(), Offset)); + MinAlign(LD->getAlignment(), Offset), + LD->getTBAAInfo()); // Follow the load with a store to the stack slot. Remember the store. // On big-endian machines this requires a truncating store to ensure // that the bits end up in the right place. @@ -536,23 +540,25 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, if (TLI.isLittleEndian()) { Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(), NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), Alignment); + LD->isNonTemporal(), Alignment, LD->getTBAAInfo()); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, TLI.getPointerTy())); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), MinAlign(Alignment,IncrementSize)); + LD->isNonTemporal(), MinAlign(Alignment, IncrementSize), + LD->getTBAAInfo()); } else { Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(), NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), Alignment); + LD->isNonTemporal(), Alignment, LD->getTBAAInfo()); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, TLI.getPointerTy())); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), MinAlign(Alignment,IncrementSize)); + LD->isNonTemporal(), MinAlign(Alignment, IncrementSize), + LD->getTBAAInfo()); } // aggregate the two parts @@ -655,6 +661,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); + const MDNode *TBAAInfo = ST->getTBAAInfo(); SDLoc dl(ST); if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) { if (CFP->getValueType(0) == MVT::f32 && @@ -663,7 +670,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { bitcastToAPInt().zextOrTrunc(32), MVT::i32); return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, Alignment, TBAAInfo); } if (CFP->getValueType(0) == MVT::f64) { @@ -672,7 +679,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { SDValue Con = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). zextOrTrunc(64), MVT::i64); return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, Alignment, TBAAInfo); } if (TLI.isTypeLegal(MVT::i32) && !ST->isVolatile()) { @@ -685,12 +692,13 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { if (TLI.isBigEndian()) std::swap(Lo, Hi); Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), isVolatile, - isNonTemporal, Alignment); + isNonTemporal, Alignment, TBAAInfo); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(4)); + DAG.getConstant(4, Ptr.getValueType())); Hi = DAG.getStore(Chain, dl, Hi, Ptr, ST->getPointerInfo().getWithOffset(4), - isVolatile, isNonTemporal, MinAlign(Alignment, 4U)); + isVolatile, isNonTemporal, MinAlign(Alignment, 4U), + TBAAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -708,6 +716,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); + const MDNode *TBAAInfo = ST->getTBAAInfo(); if (!ST->isTruncatingStore()) { if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) { @@ -745,7 +754,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { SDValue Result = DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), isVolatile, - isNonTemporal, Alignment); + isNonTemporal, Alignment, TBAAInfo); ReplaceNode(SDValue(Node, 0), Result); break; } @@ -767,7 +776,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { Value = DAG.getZeroExtendInReg(Value, dl, StVT); SDValue Result = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), - NVT, isVolatile, isNonTemporal, Alignment); + NVT, isVolatile, isNonTemporal, Alignment, + TBAAInfo); ReplaceNode(SDValue(Node, 0), Result); } else if (StWidth & (StWidth - 1)) { // If not storing a power-of-2 number of bits, expand as two stores. @@ -788,19 +798,20 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // Store the bottom RoundWidth bits. Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), RoundVT, - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, Alignment, + TBAAInfo); // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value, DAG.getConstant(RoundWidth, TLI.getShiftAmountTy(Value.getValueType()))); Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); } else { // Big endian - avoid unaligned stores. // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X @@ -809,16 +820,17 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy(Value.getValueType()))); Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(), - RoundVT, isVolatile, isNonTemporal, Alignment); + RoundVT, isVolatile, isNonTemporal, Alignment, + TBAAInfo); // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); } // The order of the stores doesn't matter. @@ -854,7 +866,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value); SDValue Result = DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, Alignment, TBAAInfo); ReplaceNode(SDValue(Node, 0), Result); break; } @@ -902,9 +914,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { assert(NVT.getSizeInBits() == VT.getSizeInBits() && "Can only promote loads to same size type"); - SDValue Res = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(), - LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), LD->getAlignment()); + SDValue Res = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getMemOperand()); RVal = DAG.getNode(ISD::BITCAST, dl, VT, Res); RChain = Res.getValue(1); break; @@ -924,6 +934,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { unsigned Alignment = LD->getAlignment(); bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); + const MDNode *TBAAInfo = LD->getTBAAInfo(); if (SrcWidth != SrcVT.getStoreSizeInBits() && // Some targets pretend to have an i1 loading operation, and actually @@ -950,7 +961,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { SDValue Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo(), - NVT, isVolatile, isNonTemporal, Alignment); + NVT, isVolatile, isNonTemporal, Alignment, TBAAInfo); Ch = Result.getValue(1); // The chain. @@ -987,16 +998,16 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo(), RoundVT, isVolatile, - isNonTemporal, Alignment); + isNonTemporal, Alignment, TBAAInfo); // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); // Build a factor node to remember that this load is independent of // the other one. @@ -1016,17 +1027,17 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // Load the top RoundWidth bits. Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo(), RoundVT, isVolatile, - isNonTemporal, Alignment); + isNonTemporal, Alignment, TBAAInfo); // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); // Build a factor node to remember that this load is independent of // the other one. @@ -1079,9 +1090,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { case TargetLowering::Expand: if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && TLI.isTypeLegal(SrcVT)) { SDValue Load = DAG.getLoad(SrcVT, dl, Chain, Ptr, - LD->getPointerInfo(), - LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), LD->getAlignment()); + LD->getMemOperand()); unsigned ExtendOp; switch (ExtType) { case ISD::EXTLOAD: @@ -1109,9 +1118,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // Turn the unsupported load into an EXTLOAD followed by an explicit // zero/sign extend inreg. SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), - Chain, Ptr, LD->getPointerInfo(), SrcVT, - LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); + Chain, Ptr, SrcVT, + LD->getMemOperand()); SDValue ValRes; if (ExtType == ISD::SEXTLOAD) ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, @@ -1386,11 +1394,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx, DAG.getConstant(EltSize, Idx.getValueType())); - if (Idx.getValueType().bitsGT(TLI.getPointerTy())) - Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx); - else - Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx); - + Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy()); StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr); if (Op.getValueType().isVector()) @@ -1428,11 +1432,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx, DAG.getConstant(EltSize, Idx.getValueType())); - - if (Idx.getValueType().bitsGT(TLI.getPointerTy())) - Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx); - else - Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx); + Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy()); SDValue SubStackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr); @@ -1531,7 +1531,8 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { unsigned Strides = (FloatVT.getSizeInBits()-1)/LoadTy.getSizeInBits(); unsigned ByteOffset = (Strides * LoadTy.getSizeInBits()) / 8; LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(), - LoadPtr, DAG.getIntPtrConstant(ByteOffset)); + LoadPtr, + DAG.getConstant(ByteOffset, LoadPtr.getValueType())); // Load a legal integer containing the sign bit. SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(), false, false, false, 0); @@ -1580,10 +1581,10 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, Chain = SP.getValue(1); unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue(); unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); - if (Align > StackAlign) - SP = DAG.getNode(ISD::AND, dl, VT, SP, - DAG.getConstant(-(uint64_t)Align, VT)); Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value + if (Align > StackAlign) + Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1, + DAG.getConstant(-(uint64_t)Align, VT)); Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true), @@ -1595,22 +1596,44 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, } /// LegalizeSetCCCondCode - Legalize a SETCC with given LHS and RHS and -/// condition code CC on the current target. This routine expands SETCC with -/// illegal condition code into AND / OR of multiple SETCC values. -void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, +/// condition code CC on the current target. +/// +/// If the SETCC has been legalized using AND / OR, then the legalized node +/// will be stored in LHS. RHS and CC will be set to SDValue(). NeedInvert +/// will be set to false. +/// +/// If the SETCC has been legalized by using getSetCCSwappedOperands(), +/// then the values of LHS and RHS will be swapped, CC will be set to the +/// new condition, and NeedInvert will be set to false. +/// +/// If the SETCC has been legalized using the inverse condcode, then LHS and +/// RHS will be unchanged, CC will set to the inverted condcode, and NeedInvert +/// will be set to true. The caller must invert the result of the SETCC with +/// SelectionDAG::getNOT() or take equivalent action to swap the effect of a +/// true/false result. +/// +/// \returns true if the SetCC has been legalized, false if it hasn't. +bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, + bool &NeedInvert, SDLoc dl) { MVT OpVT = LHS.getSimpleValueType(); ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get(); + NeedInvert = false; switch (TLI.getCondCodeAction(CCCode, OpVT)) { default: llvm_unreachable("Unknown condition code action!"); case TargetLowering::Legal: // Nothing to do. break; case TargetLowering::Expand: { + ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode); + if (TLI.isCondCodeLegal(InvCC, OpVT)) { + std::swap(LHS, RHS); + CC = DAG.getCondCode(InvCC); + return true; + } ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID; - ISD::CondCode InvCC = ISD::SETCC_INVALID; unsigned Opc = 0; switch (CCCode) { default: llvm_unreachable("Don't know how to expand this condition!"); @@ -1650,18 +1673,21 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, case ISD::SETGT: case ISD::SETGE: case ISD::SETLT: + // We only support using the inverted operation, which is computed above + // and not a different manner of supporting expanding these cases. + llvm_unreachable("Don't know how to expand this condition!"); case ISD::SETNE: case ISD::SETEQ: - InvCC = ISD::getSetCCSwappedOperands(CCCode); - if (TLI.getCondCodeAction(InvCC, OpVT) == TargetLowering::Expand) { - // We only support using the inverted operation and not a - // different manner of supporting expanding these cases. - llvm_unreachable("Don't know how to expand this condition!"); + // Try inverting the result of the inverse condition. + InvCC = CCCode == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ; + if (TLI.isCondCodeLegal(InvCC, OpVT)) { + CC = DAG.getCondCode(InvCC); + NeedInvert = true; + return true; } - LHS = DAG.getSetCC(dl, VT, RHS, LHS, InvCC); - RHS = SDValue(); - CC = SDValue(); - return; + // If inverting the condition didn't work then we have no means to expand + // the condition. + llvm_unreachable("Don't know how to expand this condition!"); } SDValue SetCC1, SetCC2; @@ -1678,9 +1704,10 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2); RHS = SDValue(); CC = SDValue(); - break; + return true; } } + return false; } /// EmitStackConvert - Emit a store/load combination to the stack. This stores @@ -1969,7 +1996,7 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, RTLIB::Libcall Call_F128, RTLIB::Libcall Call_PPCF128) { RTLIB::Libcall LC; - switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::f32: LC = Call_F32; break; case MVT::f64: LC = Call_F64; break; @@ -1987,7 +2014,7 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128) { RTLIB::Libcall LC; - switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::i8: LC = Call_I8; break; case MVT::i16: LC = Call_I16; break; @@ -2002,7 +2029,7 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, const TargetLowering &TLI) { RTLIB::Libcall LC; - switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; @@ -2049,7 +2076,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, bool isSigned = Opcode == ISD::SDIVREM; RTLIB::Libcall LC; - switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; @@ -2106,7 +2133,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, /// isSinCosLibcallAvailable - Return true if sincos libcall is available. static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) { RTLIB::Libcall LC; - switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::f32: LC = RTLIB::SINCOS_F32; break; case MVT::f64: LC = RTLIB::SINCOS_F64; break; @@ -2156,7 +2183,7 @@ void SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results) { RTLIB::Libcall LC; - switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::f32: LC = RTLIB::SINCOS_F32; break; case MVT::f64: LC = RTLIB::SINCOS_F64; break; @@ -2232,11 +2259,11 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64); // word offset constant for Hi/Lo address computation - SDValue WordOff = DAG.getConstant(sizeof(int), TLI.getPointerTy()); + SDValue WordOff = DAG.getConstant(sizeof(int), StackSlot.getValueType()); // set up Hi and Lo (into buffer) address based on endian SDValue Hi = StackSlot; - SDValue Lo = DAG.getNode(ISD::ADD, dl, - TLI.getPointerTy(), StackSlot, WordOff); + SDValue Lo = DAG.getNode(ISD::ADD, dl, StackSlot.getValueType(), + StackSlot, WordOff); if (TLI.isLittleEndian()) std::swap(Hi, Lo); @@ -2382,7 +2409,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, // as a negative number. To counteract this, the dynamic code adds an // offset depending on the data type. uint64_t FF; - switch (Op0.getValueType().getSimpleVT().SimpleTy) { + switch (Op0.getSimpleValueType().SimpleTy) { default: llvm_unreachable("Unsupported integer type!"); case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float) case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float) @@ -2395,7 +2422,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue CPIdx = DAG.getConstantPool(FudgeFactor, TLI.getPointerTy()); unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); - CPIdx = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), CPIdx, CstOffset); + CPIdx = DAG.getNode(ISD::ADD, dl, CPIdx.getValueType(), CPIdx, CstOffset); Alignment = std::min(Alignment, 4u); SDValue FudgeInReg; if (DestVT == MVT::f32) @@ -2656,6 +2683,7 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break; case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break; case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break; + case MVT::i128:LC = RTLIB::SYNC_LOCK_TEST_AND_SET_16;break; } break; case ISD::ATOMIC_CMP_SWAP: @@ -2665,6 +2693,7 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break; case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break; case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break; + case MVT::i128:LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_16;break; } break; case ISD::ATOMIC_LOAD_ADD: @@ -2674,6 +2703,7 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_ADD_16;break; } break; case ISD::ATOMIC_LOAD_SUB: @@ -2683,6 +2713,7 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_SUB_16;break; } break; case ISD::ATOMIC_LOAD_AND: @@ -2692,6 +2723,7 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_AND_16;break; } break; case ISD::ATOMIC_LOAD_OR: @@ -2701,6 +2733,7 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_OR_16;break; } break; case ISD::ATOMIC_LOAD_XOR: @@ -2710,6 +2743,7 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_XOR_16;break; } break; case ISD::ATOMIC_LOAD_NAND: @@ -2719,6 +2753,47 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_NAND_16;break; + } + break; + case ISD::ATOMIC_LOAD_MAX: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_MAX_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_MAX_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_MAX_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_MAX_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_MAX_16;break; + } + break; + case ISD::ATOMIC_LOAD_UMAX: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_UMAX_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_UMAX_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_UMAX_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_UMAX_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_UMAX_16;break; + } + break; + case ISD::ATOMIC_LOAD_MIN: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_MIN_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_MIN_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_MIN_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_MIN_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_MIN_16;break; + } + break; + case ISD::ATOMIC_LOAD_UMIN: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_UMIN_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_UMIN_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_UMIN_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_UMIN_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_UMIN_16;break; } break; } @@ -2730,6 +2805,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { SmallVector<SDValue, 8> Results; SDLoc dl(Node); SDValue Tmp1, Tmp2, Tmp3, Tmp4; + bool NeedInvert; switch (Node->getOpcode()) { case ISD::CTPOP: case ISD::CTLZ: @@ -2947,20 +3023,20 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { if (Align > TLI.getMinStackArgumentAlignment()) { assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2"); - VAList = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList, + VAList = DAG.getNode(ISD::ADD, dl, VAList.getValueType(), VAList, DAG.getConstant(Align - 1, - TLI.getPointerTy())); + VAList.getValueType())); - VAList = DAG.getNode(ISD::AND, dl, TLI.getPointerTy(), VAList, + VAList = DAG.getNode(ISD::AND, dl, VAList.getValueType(), VAList, DAG.getConstant(-(int64_t)Align, - TLI.getPointerTy())); + VAList.getValueType())); } // Increment the pointer, VAList, to the next vaarg - Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList, + Tmp3 = DAG.getNode(ISD::ADD, dl, VAList.getValueType(), VAList, DAG.getConstant(TLI.getDataLayout()-> getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())), - TLI.getPointerTy())); + VAList.getValueType())); // Store the incremented VAList to the legalized pointer Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2, MachinePointerInfo(V), false, false, 0); @@ -3231,6 +3307,13 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { RTLIB::NEARBYINT_F128, RTLIB::NEARBYINT_PPCF128)); break; + case ISD::FROUND: + Results.push_back(ExpandFPLibCall(Node, RTLIB::ROUND_F32, + RTLIB::ROUND_F64, + RTLIB::ROUND_F80, + RTLIB::ROUND_F128, + RTLIB::ROUND_PPCF128)); + break; case ISD::FPOWI: Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64, RTLIB::POWI_F80, RTLIB::POWI_F128, @@ -3565,9 +3648,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { unsigned EntrySize = DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD); - Index = DAG.getNode(ISD::MUL, dl, PTy, - Index, DAG.getConstant(EntrySize, PTy)); - SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); + Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), + Index, DAG.getConstant(EntrySize, Index.getValueType())); + SDValue Addr = DAG.getNode(ISD::ADD, dl, Index.getValueType(), + Index, Table); EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8); SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr, @@ -3611,10 +3695,21 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp1 = Node->getOperand(0); Tmp2 = Node->getOperand(1); Tmp3 = Node->getOperand(2); - LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, Tmp3, dl); + bool Legalized = LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, + Tmp3, NeedInvert, dl); + + if (Legalized) { + // If we expanded the SETCC by swapping LHS and RHS, or by inverting the + // condition code, create a new SETCC node. + if (Tmp3.getNode()) + Tmp1 = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), + Tmp1, Tmp2, Tmp3); + + // If we expanded the SETCC by inverting the condition code, then wrap + // the existing SETCC in a NOT to restore the intended condition. + if (NeedInvert) + Tmp1 = DAG.getNOT(dl, Tmp1, Tmp1->getValueType(0)); - // If we expanded the SETCC into an AND/OR, return the new node - if (Tmp2.getNode() == 0) { Results.push_back(Tmp1); break; } @@ -3645,14 +3740,52 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp4 = Node->getOperand(3); // False SDValue CC = Node->getOperand(4); - LegalizeSetCCCondCode(getSetCCResultType(Tmp1.getValueType()), - Tmp1, Tmp2, CC, dl); + bool Legalized = false; + // Try to legalize by inverting the condition. This is for targets that + // might support an ordered version of a condition, but not the unordered + // version (or vice versa). + ISD::CondCode InvCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), + Tmp1.getValueType().isInteger()); + if (TLI.isCondCodeLegal(InvCC, Tmp1.getSimpleValueType())) { + // Use the new condition code and swap true and false + Legalized = true; + Tmp1 = DAG.getSelectCC(dl, Tmp1, Tmp2, Tmp4, Tmp3, InvCC); + } else { + // If The inverse is not legal, then try to swap the arguments using + // the inverse condition code. + ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InvCC); + if (TLI.isCondCodeLegal(SwapInvCC, Tmp1.getSimpleValueType())) { + // The swapped inverse condition is legal, so swap true and false, + // lhs and rhs. + Legalized = true; + Tmp1 = DAG.getSelectCC(dl, Tmp2, Tmp1, Tmp4, Tmp3, SwapInvCC); + } + } + + if (!Legalized) { + Legalized = LegalizeSetCCCondCode( + getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, CC, NeedInvert, + dl); + + assert(Legalized && "Can't legalize SELECT_CC with legal condition!"); + + // If we expanded the SETCC by inverting the condition code, then swap + // the True/False operands to match. + if (NeedInvert) + std::swap(Tmp3, Tmp4); - assert(!Tmp2.getNode() && "Can't legalize SELECT_CC with legal condition!"); - Tmp2 = DAG.getConstant(0, Tmp1.getValueType()); - CC = DAG.getCondCode(ISD::SETNE); - Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2, - Tmp3, Tmp4, CC); + // If we expanded the SETCC by swapping LHS and RHS, or by inverting the + // condition code, create a new SELECT_CC node. + if (CC.getNode()) { + Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), + Tmp1, Tmp2, Tmp3, Tmp4, CC); + } else { + Tmp2 = DAG.getConstant(0, Tmp1.getValueType()); + CC = DAG.getCondCode(ISD::SETNE); + Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2, + Tmp3, Tmp4, CC); + } + } Results.push_back(Tmp1); break; } @@ -3662,14 +3795,27 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp3 = Node->getOperand(3); // RHS Tmp4 = Node->getOperand(1); // CC - LegalizeSetCCCondCode(getSetCCResultType(Tmp2.getValueType()), - Tmp2, Tmp3, Tmp4, dl); - - assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!"); - Tmp3 = DAG.getConstant(0, Tmp2.getValueType()); - Tmp4 = DAG.getCondCode(ISD::SETNE); - Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2, - Tmp3, Node->getOperand(4)); + bool Legalized = LegalizeSetCCCondCode(getSetCCResultType( + Tmp2.getValueType()), Tmp2, Tmp3, Tmp4, NeedInvert, dl); + (void)Legalized; + assert(Legalized && "Can't legalize BR_CC with legal condition!"); + + // If we expanded the SETCC by inverting the condition code, then wrap + // the existing SETCC in a NOT to restore the intended condition. + if (NeedInvert) + Tmp4 = DAG.getNOT(dl, Tmp4, Tmp4->getValueType(0)); + + // If we expanded the SETCC by swapping LHS and RHS, create a new BR_CC + // node. + if (Tmp4.getNode()) { + Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, + Tmp4, Tmp2, Tmp3, Node->getOperand(4)); + } else { + Tmp3 = DAG.getConstant(0, Tmp2.getValueType()); + Tmp4 = DAG.getCondCode(ISD::SETNE); + Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2, + Tmp3, Node->getOperand(4)); + } Results.push_back(Tmp1); break; } diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index cea0b02..ecf4c5d 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -88,6 +88,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FPOWI: R = SoftenFloatRes_FPOWI(N); break; case ISD::FREM: R = SoftenFloatRes_FREM(N); break; case ISD::FRINT: R = SoftenFloatRes_FRINT(N); break; + case ISD::FROUND: R = SoftenFloatRes_FROUND(N); break; case ISD::FSIN: R = SoftenFloatRes_FSIN(N); break; case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break; case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break; @@ -160,7 +161,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { RTLIB::ADD_F80, RTLIB::ADD_F128, RTLIB::ADD_PPCF128), - NVT, Ops, 2, false, SDLoc(N)); + NVT, Ops, 2, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) { @@ -172,7 +173,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) { RTLIB::CEIL_F80, RTLIB::CEIL_F128, RTLIB::CEIL_PPCF128), - NVT, &Op, 1, false, SDLoc(N)); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { @@ -226,7 +227,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) { RTLIB::COS_F80, RTLIB::COS_F128, RTLIB::COS_PPCF128), - NVT, &Op, 1, false, SDLoc(N)); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) { @@ -239,7 +240,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) { RTLIB::DIV_F80, RTLIB::DIV_F128, RTLIB::DIV_PPCF128), - NVT, Ops, 2, false, SDLoc(N)); + NVT, Ops, 2, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) { @@ -251,7 +252,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) { RTLIB::EXP_F80, RTLIB::EXP_F128, RTLIB::EXP_PPCF128), - NVT, &Op, 1, false, SDLoc(N)); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) { @@ -263,7 +264,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) { RTLIB::EXP2_F80, RTLIB::EXP2_F128, RTLIB::EXP2_PPCF128), - NVT, &Op, 1, false, SDLoc(N)); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) { @@ -275,7 +276,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) { RTLIB::FLOOR_F80, RTLIB::FLOOR_F128, RTLIB::FLOOR_PPCF128), - NVT, &Op, 1, false, SDLoc(N)); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) { @@ -287,7 +288,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) { RTLIB::LOG_F80, RTLIB::LOG_F128, RTLIB::LOG_PPCF128), - NVT, &Op, 1, false, SDLoc(N)); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) { @@ -299,7 +300,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) { RTLIB::LOG2_F80, RTLIB::LOG2_F128, RTLIB::LOG2_PPCF128), - NVT, &Op, 1, false, SDLoc(N)); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { @@ -311,7 +312,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { RTLIB::LOG10_F80, RTLIB::LOG10_F128, RTLIB::LOG10_PPCF128), - NVT, &Op, 1, false, SDLoc(N)); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { @@ -325,7 +326,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { RTLIB::FMA_F80, RTLIB::FMA_F128, RTLIB::FMA_PPCF128), - NVT, Ops, 3, false, SDLoc(N)); + NVT, Ops, 3, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { @@ -338,7 +339,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { RTLIB::MUL_F80, RTLIB::MUL_F128, RTLIB::MUL_PPCF128), - NVT, Ops, 2, false, SDLoc(N)); + NVT, Ops, 2, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { @@ -350,7 +351,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { RTLIB::NEARBYINT_F80, RTLIB::NEARBYINT_F128, RTLIB::NEARBYINT_PPCF128), - NVT, &Op, 1, false, SDLoc(N)); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { @@ -364,7 +365,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { RTLIB::SUB_F80, RTLIB::SUB_F128, RTLIB::SUB_PPCF128), - NVT, Ops, 2, false, SDLoc(N)); + NVT, Ops, 2, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { @@ -372,7 +373,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { SDValue Op = N->getOperand(0); RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); - return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)); + return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first; } // FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special @@ -381,7 +382,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP32(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = N->getOperand(0); return TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, NVT, &Op, 1, false, - SDLoc(N)); + SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { @@ -389,7 +390,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { SDValue Op = N->getOperand(0); RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!"); - return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)); + return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) { @@ -402,7 +403,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) { RTLIB::POW_F80, RTLIB::POW_F128, RTLIB::POW_PPCF128), - NVT, Ops, 2, false, SDLoc(N)); + NVT, Ops, 2, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { @@ -416,7 +417,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { RTLIB::POWI_F80, RTLIB::POWI_F128, RTLIB::POWI_PPCF128), - NVT, Ops, 2, false, SDLoc(N)); + NVT, Ops, 2, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) { @@ -429,7 +430,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) { RTLIB::REM_F80, RTLIB::REM_F128, RTLIB::REM_PPCF128), - NVT, Ops, 2, false, SDLoc(N)); + NVT, Ops, 2, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) { @@ -441,7 +442,19 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) { RTLIB::RINT_F80, RTLIB::RINT_F128, RTLIB::RINT_PPCF128), - NVT, &Op, 1, false, SDLoc(N)); + NVT, &Op, 1, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::ROUND_F32, + RTLIB::ROUND_F64, + RTLIB::ROUND_F80, + RTLIB::ROUND_F128, + RTLIB::ROUND_PPCF128), + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { @@ -453,7 +466,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { RTLIB::SIN_F80, RTLIB::SIN_F128, RTLIB::SIN_PPCF128), - NVT, &Op, 1, false, SDLoc(N)); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) { @@ -465,7 +478,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) { RTLIB::SQRT_F80, RTLIB::SQRT_F128, RTLIB::SQRT_PPCF128), - NVT, &Op, 1, false, SDLoc(N)); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { @@ -478,7 +491,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { RTLIB::SUB_F80, RTLIB::SUB_F128, RTLIB::SUB_PPCF128), - NVT, Ops, 2, false, SDLoc(N)); + NVT, Ops, 2, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { @@ -490,7 +503,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { RTLIB::TRUNC_F80, RTLIB::TRUNC_F128, RTLIB::TRUNC_PPCF128), - NVT, &Op, 1, false, SDLoc(N)); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { @@ -504,7 +517,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(), L->getPointerInfo(), NVT, L->isVolatile(), - L->isNonTemporal(), false, L->getAlignment()); + L->isNonTemporal(), false, L->getAlignment(), + L->getTBAAInfo()); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); @@ -516,7 +530,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { L->getMemoryVT(), dl, L->getChain(), L->getBasePtr(), L->getOffset(), L->getPointerInfo(), L->getMemoryVT(), L->isVolatile(), - L->isNonTemporal(), false, L->getAlignment()); + L->isNonTemporal(), false, L->getAlignment(), + L->getTBAAInfo()); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); @@ -585,7 +600,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { NVT, N->getOperand(0)); return TLI.makeLibCall(DAG, LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT), - &Op, 1, false, dl); + &Op, 1, false, dl).first; } @@ -645,7 +660,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) { assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall"); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)); + return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { @@ -676,7 +691,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)); + return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) { @@ -684,14 +699,14 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)); + return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatOp_FP32_TO_FP16(SDNode *N) { EVT RVT = N->getValueType(0); RTLIB::Libcall LC = RTLIB::FPROUND_F32_F16; SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)); + return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { @@ -754,9 +769,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) { Val = GetSoftenedFloat(Val); return DAG.getStore(ST->getChain(), dl, Val, ST->getBasePtr(), - ST->getPointerInfo(), - ST->isVolatile(), ST->isNonTemporal(), - ST->getAlignment()); + ST->getMemOperand()); } @@ -817,6 +830,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { case ISD::FPOW: ExpandFloatRes_FPOW(N, Lo, Hi); break; case ISD::FPOWI: ExpandFloatRes_FPOWI(N, Lo, Hi); break; case ISD::FRINT: ExpandFloatRes_FRINT(N, Lo, Hi); break; + case ISD::FROUND: ExpandFloatRes_FROUND(N, Lo, Hi); break; case ISD::FSIN: ExpandFloatRes_FSIN(N, Lo, Hi); break; case ISD::FSQRT: ExpandFloatRes_FSQRT(N, Lo, Hi); break; case ISD::FSUB: ExpandFloatRes_FSUB(N, Lo, Hi); break; @@ -912,7 +926,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo, RTLIB::DIV_F128, RTLIB::DIV_PPCF128), N->getValueType(0), Ops, 2, false, - SDLoc(N)); + SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -986,7 +1000,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo, RTLIB::FMA_F128, RTLIB::FMA_PPCF128), N->getValueType(0), Ops, 3, false, - SDLoc(N)); + SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -1000,7 +1014,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo, RTLIB::MUL_F128, RTLIB::MUL_PPCF128), N->getValueType(0), Ops, 2, false, - SDLoc(N)); + SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -1072,6 +1086,18 @@ void DAGTypeLegalizer::ExpandFloatRes_FRINT(SDNode *N, GetPairElements(Call, Lo, Hi); } +void DAGTypeLegalizer::ExpandFloatRes_FROUND(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::ROUND_F32, + RTLIB::ROUND_F64, + RTLIB::ROUND_F80, + RTLIB::ROUND_F128, + RTLIB::ROUND_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), @@ -1102,7 +1128,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo, RTLIB::SUB_F128, RTLIB::SUB_PPCF128), N->getValueType(0), Ops, 2, false, - SDLoc(N)); + SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -1134,8 +1160,7 @@ void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo, assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?"); Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr, - LD->getPointerInfo(), LD->getMemoryVT(), LD->isVolatile(), - LD->isNonTemporal(), LD->getAlignment()); + LD->getMemoryVT(), LD->getMemOperand()); // Remember the chain. Chain = Hi.getValue(1); @@ -1181,7 +1206,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, } assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!"); - Hi = TLI.makeLibCall(DAG, LC, VT, &Src, 1, true, dl); + Hi = TLI.makeLibCall(DAG, LC, VT, &Src, 1, true, dl).first; GetPairElements(Hi, Lo, Hi); } @@ -1251,6 +1276,7 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) { case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break; case ISD::BR_CC: Res = ExpandFloatOp_BR_CC(N); break; + case ISD::FCOPYSIGN: Res = ExpandFloatOp_FCOPYSIGN(N); break; case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break; case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break; case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break; @@ -1325,6 +1351,17 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) { N->getOperand(4)), 0); } +SDValue DAGTypeLegalizer::ExpandFloatOp_FCOPYSIGN(SDNode *N) { + assert(N->getOperand(1).getValueType() == MVT::ppcf128 && + "Logic only correct for ppcf128!"); + SDValue Lo, Hi; + GetExpandedFloat(N->getOperand(1), Lo, Hi); + // The ppcf128 value is providing only the sign; take it from the + // higher-order double (which must have the larger magnitude). + return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), + N->getValueType(0), N->getOperand(0), Hi); +} + SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) { assert(N->getOperand(0).getValueType() == MVT::ppcf128 && "Logic only correct for ppcf128!"); @@ -1353,7 +1390,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); - return TLI.makeLibCall(DAG, LC, RVT, &N->getOperand(0), 1, false, dl); + return TLI.makeLibCall(DAG, LC, RVT, &N->getOperand(0), 1, false, dl).first; } SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { @@ -1386,7 +1423,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); return TLI.makeLibCall(DAG, LC, N->getValueType(0), &N->getOperand(0), 1, - false, dl); + false, dl).first; } SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) { @@ -1445,7 +1482,5 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) { GetExpandedOp(ST->getValue(), Lo, Hi); return DAG.getTruncStore(Chain, SDLoc(N), Hi, Ptr, - ST->getPointerInfo(), - ST->getMemoryVT(), ST->isVolatile(), - ST->isNonTemporal(), ST->getAlignment()); + ST->getMemoryVT(), ST->getMemOperand()); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index ff8f1f9..4255948 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -417,9 +417,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) { ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType(); SDLoc dl(N); SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(), - N->getPointerInfo(), - N->getMemoryVT(), N->isVolatile(), - N->isNonTemporal(), N->getAlignment()); + N->getMemoryVT(), N->getMemOperand()); // Legalized the chain result - switch anything that used the old chain to // use the new one. @@ -919,7 +917,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) { // type does not have a strange size (eg: it is not i1). EVT VecVT = N->getValueType(0); unsigned NumElts = VecVT.getVectorNumElements(); - assert(!(NumElts & 1) && "Legal vector of one illegal element?"); + assert(!((NumElts & 1) && (!TLI.isTypeLegal(VecVT))) && + "Legal vector of one illegal element?"); // Promote the inserted value. The type does not need to match the // vector element type. Check that any extra bits introduced will be @@ -1037,17 +1036,13 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!"); SDValue Ch = N->getChain(), Ptr = N->getBasePtr(); - unsigned Alignment = N->getAlignment(); - bool isVolatile = N->isVolatile(); - bool isNonTemporal = N->isNonTemporal(); SDLoc dl(N); SDValue Val = GetPromotedInteger(N->getValue()); // Get promoted value. // Truncate the value and store the result. - return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getPointerInfo(), - N->getMemoryVT(), - isVolatile, isNonTemporal, Alignment); + return DAG.getTruncStore(Ch, dl, Val, Ptr, + N->getMemoryVT(), N->getMemOperand()); } SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) { @@ -1193,6 +1188,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break; case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break; case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break; + case MVT::i128:LC = RTLIB::SYNC_LOCK_TEST_AND_SET_16;break; } break; case ISD::ATOMIC_CMP_SWAP: @@ -1202,6 +1198,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break; case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break; case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break; + case MVT::i128:LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_16;break; } break; case ISD::ATOMIC_LOAD_ADD: @@ -1211,6 +1208,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_ADD_16;break; } break; case ISD::ATOMIC_LOAD_SUB: @@ -1220,6 +1218,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_SUB_16;break; } break; case ISD::ATOMIC_LOAD_AND: @@ -1229,6 +1228,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_AND_16;break; } break; case ISD::ATOMIC_LOAD_OR: @@ -1238,6 +1238,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_OR_16;break; } break; case ISD::ATOMIC_LOAD_XOR: @@ -1247,6 +1248,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_XOR_16;break; } break; case ISD::ATOMIC_LOAD_NAND: @@ -1256,6 +1258,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_NAND_16;break; } break; } @@ -1770,7 +1773,8 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, SDValue Op = N->getOperand(0); RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, true/*irrelevant*/, dl), + SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, true/*irrelevant*/, + dl).first, Lo, Hi); } @@ -1781,7 +1785,8 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo, SDValue Op = N->getOperand(0); RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, false/*irrelevant*/, dl), + SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, false/*irrelevant*/, + dl).first, Lo, Hi); } @@ -1803,6 +1808,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, bool isVolatile = N->isVolatile(); bool isNonTemporal = N->isNonTemporal(); bool isInvariant = N->isInvariant(); + const MDNode *TBAAInfo = N->getTBAAInfo(); SDLoc dl(N); assert(NVT.isByteSized() && "Expanded type not byte sized!"); @@ -1811,7 +1817,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, EVT MemVT = N->getMemoryVT(); Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), - MemVT, isVolatile, isNonTemporal, Alignment); + MemVT, isVolatile, isNonTemporal, Alignment, TBAAInfo); // Remember the chain. Ch = Lo.getValue(1); @@ -1833,7 +1839,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, } else if (TLI.isLittleEndian()) { // Little-endian - low bits are at low addresses. Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(), - isVolatile, isNonTemporal, isInvariant, Alignment); + isVolatile, isNonTemporal, isInvariant, Alignment, + TBAAInfo); unsigned ExcessBits = N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); @@ -1842,11 +1849,11 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -1864,17 +1871,17 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits() - ExcessBits), - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, Alignment, TBAAInfo); // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); // Load the rest of the low bits. Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), EVT::getIntegerVT(*DAG.getContext(), ExcessBits), isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -1997,7 +2004,8 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported MUL!"); SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true/*irrelevant*/, dl), + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true/*irrelevant*/, + dl).first, Lo, Hi); } @@ -2060,7 +2068,7 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!"); SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, @@ -2155,7 +2163,8 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, isSigned, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, isSigned, dl).first, Lo, + Hi); return; } @@ -2238,7 +2247,7 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!"); SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N, @@ -2378,7 +2387,7 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!"); SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, @@ -2398,7 +2407,7 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!"); SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N, @@ -2685,7 +2694,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) { RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Don't know how to expand this SINT_TO_FP!"); - return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, SDLoc(N)); + return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, SDLoc(N)).first; } SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { @@ -2702,6 +2711,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { unsigned Alignment = N->getAlignment(); bool isVolatile = N->isVolatile(); bool isNonTemporal = N->isNonTemporal(); + const MDNode *TBAAInfo = N->getTBAAInfo(); SDLoc dl(N); SDValue Lo, Hi; @@ -2711,7 +2721,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { GetExpandedInteger(N->getValue(), Lo, Hi); return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), N->getMemoryVT(), isVolatile, isNonTemporal, - Alignment); + Alignment, TBAAInfo); } if (TLI.isLittleEndian()) { @@ -2719,7 +2729,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { GetExpandedInteger(N->getValue(), Lo, Hi); Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, Alignment, TBAAInfo); unsigned ExcessBits = N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); @@ -2728,11 +2738,11 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -2760,17 +2770,17 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { // Store both the high bits and maybe some of the low bits. Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(), - HiVT, isVolatile, isNonTemporal, Alignment); + HiVT, isVolatile, isNonTemporal, Alignment, TBAAInfo); // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); // Store the lowest ExcessBits bits in the second half. Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), EVT::getIntegerVT(*DAG.getContext(), ExcessBits), isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -2835,7 +2845,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { SDValue Offset = DAG.getSelect(dl, Zero.getValueType(), SignSet, Zero, Four); unsigned Alignment = cast<ConstantPoolSDNode>(FudgePtr)->getAlignment(); - FudgePtr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), FudgePtr, Offset); + FudgePtr = DAG.getNode(ISD::ADD, dl, FudgePtr.getValueType(), + FudgePtr, Offset); Alignment = std::min(Alignment, 4u); // Load the value out, extending it from f32 to the destination float type. @@ -2852,7 +2863,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Don't know how to expand this UINT_TO_FP!"); - return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, dl); + return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, dl).first; } SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index fd770d1..eb13230 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -958,20 +958,6 @@ SDValue DAGTypeLegalizer::DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo) { return SDValue(N->getOperand(ResNo)); } -/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type -/// which is split into two not necessarily identical pieces. -void DAGTypeLegalizer::GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT) { - // Currently all types are split in half. - if (!InVT.isVector()) { - LoVT = HiVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); - } else { - unsigned NumElements = InVT.getVectorNumElements(); - assert(!(NumElements & 1) && "Splitting vector, but not in half!"); - LoVT = HiVT = EVT::getVectorVT(*DAG.getContext(), - InVT.getVectorElementType(), NumElements/2); - } -} - /// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and /// high parts of the given value. void DAGTypeLegalizer::GetPairElements(SDValue Pair, @@ -988,10 +974,7 @@ SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index) { SDLoc dl(Index); // Make sure the index type is big enough to compute in. - if (Index.getValueType().bitsGT(TLI.getPointerTy())) - Index = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Index); - else - Index = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Index); + Index = DAG.getZExtOrTrunc(Index, dl, TLI.getPointerTy()); // Calculate the element offset and add it to the pointer. unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size. @@ -1024,20 +1007,23 @@ SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N, unsigned NumOps = N->getNumOperands(); SDLoc dl(N); if (NumOps == 0) { - return TLI.makeLibCall(DAG, LC, N->getValueType(0), 0, 0, isSigned, dl); + return TLI.makeLibCall(DAG, LC, N->getValueType(0), 0, 0, isSigned, + dl).first; } else if (NumOps == 1) { SDValue Op = N->getOperand(0); - return TLI.makeLibCall(DAG, LC, N->getValueType(0), &Op, 1, isSigned, dl); + return TLI.makeLibCall(DAG, LC, N->getValueType(0), &Op, 1, isSigned, + dl).first; } else if (NumOps == 2) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, 2, isSigned, dl); + return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, 2, isSigned, + dl).first; } SmallVector<SDValue, 8> Ops(NumOps); for (unsigned i = 0; i < NumOps; ++i) Ops[i] = N->getOperand(i); return TLI.makeLibCall(DAG, LC, N->getValueType(0), - &Ops[0], NumOps, isSigned, dl); + &Ops[0], NumOps, isSigned, dl).first; } // ExpandChainLibCall - Expand a node into a call to a libcall. Similar to diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 63e9af3..13bb08f 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -410,6 +410,7 @@ private: SDValue SoftenFloatRes_FPOWI(SDNode *N); SDValue SoftenFloatRes_FREM(SDNode *N); SDValue SoftenFloatRes_FRINT(SDNode *N); + SDValue SoftenFloatRes_FROUND(SDNode *N); SDValue SoftenFloatRes_FSIN(SDNode *N); SDValue SoftenFloatRes_FSQRT(SDNode *N); SDValue SoftenFloatRes_FSUB(SDNode *N); @@ -470,6 +471,7 @@ private: void ExpandFloatRes_FPOWI (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FREM (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FRINT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FROUND (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -480,6 +482,7 @@ private: // Float Operand Expansion. bool ExpandFloatOperand(SDNode *N, unsigned OperandNo); SDValue ExpandFloatOp_BR_CC(SDNode *N); + SDValue ExpandFloatOp_FCOPYSIGN(SDNode *N); SDValue ExpandFloatOp_FP_ROUND(SDNode *N); SDValue ExpandFloatOp_FP_TO_SINT(SDNode *N); SDValue ExpandFloatOp_FP_TO_UINT(SDNode *N); @@ -534,7 +537,7 @@ private: // Vector Operand Scalarization: <1 x ty> -> ty. bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo); SDValue ScalarizeVecOp_BITCAST(SDNode *N); - SDValue ScalarizeVecOp_EXTEND(SDNode *N); + SDValue ScalarizeVecOp_UnaryOp(SDNode *N); SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N); SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo); @@ -558,6 +561,7 @@ private: void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -628,6 +632,7 @@ private: SDValue WidenVecRes_Ternary(SDNode *N); SDValue WidenVecRes_Binary(SDNode *N); + SDValue WidenVecRes_BinaryCanTrap(SDNode *N); SDValue WidenVecRes_Convert(SDNode *N); SDValue WidenVecRes_POWI(SDNode *N); SDValue WidenVecRes_Shift(SDNode *N); @@ -699,10 +704,6 @@ private: GetExpandedFloat(Op, Lo, Hi); } - /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type - /// which is split (or expanded) into two not necessarily identical pieces. - void GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT); - /// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and /// high parts of the given value. void GetPairElements(SDValue Pair, SDValue &Lo, SDValue &Hi); @@ -730,6 +731,12 @@ private: GetExpandedFloat(Op, Lo, Hi); } + + /// This function will split the integer \p Op into \p NumElements + /// operations of type \p EltVT and store them in \p Ops. + void IntegerToVector(SDValue Op, unsigned NumElements, + SmallVectorImpl<SDValue> &Ops, EVT EltVT); + // Generic Result Expansion. void ExpandRes_MERGE_VALUES (SDNode *N, unsigned ResNo, SDValue &Lo, SDValue &Hi); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 96f6143..c749fde 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -77,13 +77,9 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { case TargetLowering::TypeWidenVector: { assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BITCAST"); InOp = GetWidenedVector(InOp); - EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), - InVT.getVectorNumElements()/2); - Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, - DAG.getConstant(0, TLI.getVectorIdxTy())); - Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, - DAG.getConstant(InNVT.getVectorNumElements(), - TLI.getVectorIdxTy())); + EVT LoVT, HiVT; + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(InVT); + llvm::tie(Lo, Hi) = DAG.SplitVector(InOp, dl, LoVT, HiVT); if (TLI.isBigEndian()) std::swap(Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); @@ -169,7 +165,8 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { // Increment the pointer to the other half. unsigned IncrementSize = NOutVT.getSizeInBits() / 8; StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, + StackPtr.getValueType())); // Load the second half from the stack slot. Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, @@ -253,20 +250,22 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); bool isInvariant = LD->isInvariant(); + const MDNode *TBAAInfo = LD->getTBAAInfo(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(), - isVolatile, isNonTemporal, isInvariant, Alignment); + isVolatile, isNonTemporal, isInvariant, Alignment, + TBAAInfo); // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits() / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), isVolatile, isNonTemporal, isInvariant, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -307,6 +306,25 @@ void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) { // Generic Operand Expansion. //===--------------------------------------------------------------------===// +void DAGTypeLegalizer::IntegerToVector(SDValue Op, unsigned NumElements, + SmallVectorImpl<SDValue> &Ops, + EVT EltVT) { + assert(Op.getValueType().isInteger()); + SDLoc DL(Op); + SDValue Parts[2]; + + if (NumElements > 1) { + NumElements >>= 1; + SplitInteger(Op, Parts[0], Parts[1]); + if (TLI.isBigEndian()) + std::swap(Parts[0], Parts[1]); + IntegerToVector(Parts[0], NumElements, Ops, EltVT); + IntegerToVector(Parts[1], NumElements, Ops, EltVT); + } else { + Ops.push_back(DAG.getNode(ISD::BITCAST, DL, EltVT, Op)); + } +} + SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) { SDLoc dl(N); if (N->getValueType(0).isVector()) { @@ -315,21 +333,27 @@ SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) { // instead, but only if the new vector type is legal (otherwise there // is no point, and it might create expansion loops). For example, on // x86 this turns v1i64 = BITCAST i64 into v1i64 = BITCAST v2i32. + // + // FIXME: I'm not sure why we are first trying to split the input into + // a 2 element vector, so I'm leaving it here to maintain the current + // behavior. + unsigned NumElts = 2; EVT OVT = N->getOperand(0).getValueType(); EVT NVT = EVT::getVectorVT(*DAG.getContext(), TLI.getTypeToTransformTo(*DAG.getContext(), OVT), - 2); - - if (isTypeLegal(NVT)) { - SDValue Parts[2]; - GetExpandedOp(N->getOperand(0), Parts[0], Parts[1]); + NumElts); + if (!isTypeLegal(NVT)) { + // If we can't find a legal type by splitting the integer in half, + // then we can use the node's value type. + NumElts = N->getValueType(0).getVectorNumElements(); + NVT = N->getValueType(0); + } - if (TLI.isBigEndian()) - std::swap(Parts[0], Parts[1]); + SmallVector<SDValue, 8> Ops; + IntegerToVector(N->getOperand(0), NumElts, Ops, NVT.getVectorElementType()); - SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Parts, 2); - return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec); - } + SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, &Ops[0], NumElts); + return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec); } // Otherwise, store to a temporary and load out again as the new type. @@ -439,6 +463,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { unsigned Alignment = St->getAlignment(); bool isVolatile = St->isVolatile(); bool isNonTemporal = St->isNonTemporal(); + const MDNode *TBAAInfo = St->getTBAAInfo(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); unsigned IncrementSize = NVT.getSizeInBits() / 8; @@ -450,15 +475,14 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { std::swap(Lo, Hi); Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(), - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, Alignment, TBAAInfo); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); - assert(isTypeLegal(Ptr.getValueType()) && "Pointers must be legal!"); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getStore(Chain, dl, Hi, Ptr, St->getPointerInfo().getWithOffset(IncrementSize), isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -489,14 +513,12 @@ void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, SDValue Cond = N->getOperand(0); CL = CH = Cond; if (Cond.getValueType().isVector()) { - assert(Cond.getValueType().getVectorElementType() == MVT::i1 && - "Condition legalized before result?"); - unsigned NumElements = Cond.getValueType().getVectorNumElements(); - EVT VCondTy = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElements / 2); - CL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond, - DAG.getConstant(0, TLI.getVectorIdxTy())); - CH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond, - DAG.getConstant(NumElements / 2, TLI.getVectorIdxTy())); + // Check if there are already splitted versions of the vector available and + // use those instead of splitting the mask operand again. + if (getTypeAction(Cond.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Cond, CL, CH); + else + llvm::tie(CL, CH) = DAG.SplitVector(Cond, dl); } Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), CL, LL, RL); @@ -518,7 +540,7 @@ void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::SplitRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT LoVT, HiVT; - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); Lo = DAG.getUNDEF(LoVT); Hi = DAG.getUNDEF(HiVT); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index bbe11b8..2c3cdcc 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -171,7 +171,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { return TranslateLegalizeResults(Op, Result); case TargetLowering::Custom: Changed = true; - return LegalizeOp(TLI.LowerOperation(Result, DAG)); + return TranslateLegalizeResults(Op, TLI.LowerOperation(Result, DAG)); case TargetLowering::Expand: Changed = true; return LegalizeOp(ExpandStore(Op)); @@ -227,6 +227,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FP_TO_UINT: case ISD::FNEG: case ISD::FABS: + case ISD::FCOPYSIGN: case ISD::FSQRT: case ISD::FSIN: case ISD::FCOS: @@ -241,6 +242,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FTRUNC: case ISD::FRINT: case ISD::FNEARBYINT: + case ISD::FROUND: case ISD::FFLOOR: case ISD::FP_ROUND: case ISD::FP_EXTEND: @@ -416,7 +418,8 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { ScalarLoad = DAG.getLoad(WideVT, dl, Chain, BasePTR, LD->getPointerInfo().getWithOffset(Offset), LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), LD->getAlignment()); + LD->isInvariant(), LD->getAlignment(), + LD->getTBAAInfo()); } else { EVT LoadVT = WideVT; while (RemainingBytes < LoadBytes) { @@ -426,13 +429,14 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR, LD->getPointerInfo().getWithOffset(Offset), LoadVT, LD->isVolatile(), - LD->isNonTemporal(), LD->getAlignment()); + LD->isNonTemporal(), LD->getAlignment(), + LD->getTBAAInfo()); } RemainingBytes -= LoadBytes; Offset += LoadBytes; BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, - DAG.getIntPtrConstant(LoadBytes)); + DAG.getConstant(LoadBytes, BasePTR.getValueType())); LoadVals.push_back(ScalarLoad.getValue(0)); LoadChains.push_back(ScalarLoad.getValue(1)); @@ -497,10 +501,10 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride), SrcVT.getScalarType(), LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); + LD->getAlignment(), LD->getTBAAInfo()); BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, - DAG.getIntPtrConstant(Stride)); + DAG.getConstant(Stride, BasePTR.getValueType())); Vals.push_back(ScalarLoad.getValue(0)); LoadChains.push_back(ScalarLoad.getValue(1)); @@ -529,6 +533,7 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) { unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); + const MDNode *TBAAInfo = ST->getTBAAInfo(); unsigned NumElem = StVT.getVectorNumElements(); // The type of the data we want to save @@ -556,10 +561,10 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) { // This scalar TruncStore may be illegal, but we legalize it later. SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR, ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT, - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, Alignment, TBAAInfo); BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, - DAG.getIntPtrConstant(Stride)); + DAG.getConstant(Stride, BasePTR.getValueType())); Stores.push_back(Store); } @@ -597,10 +602,7 @@ SDValue VectorLegalizer::ExpandSELECT(SDValue Op) { return DAG.UnrollVectorOp(Op.getNode()); // Generate a mask operand. - EVT MaskTy = TLI.getSetCCResultType(*DAG.getContext(), VT); - assert(MaskTy.isVector() && "Invalid CC type"); - assert(MaskTy.getSizeInBits() == Op1.getValueType().getSizeInBits() - && "Invalid mask size"); + EVT MaskTy = VT.changeVectorElementTypeToInteger(); // What is the size of each element in the vector mask. EVT BitTy = MaskTy.getScalarType(); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 54380ec..f7a3e3d 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -83,6 +83,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::FRINT: + case ISD::FROUND: case ISD::FSIN: case ISD::FSQRT: case ISD::FTRUNC: @@ -97,6 +98,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::ADD: case ISD::AND: case ISD::FADD: + case ISD::FCOPYSIGN: case ISD::FDIV: case ISD::FMUL: case ISD::FPOW: @@ -215,7 +217,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { N->getPointerInfo(), N->getMemoryVT().getVectorElementType(), N->isVolatile(), N->isNonTemporal(), - N->isInvariant(), N->getOriginalAlignment()); + N->isInvariant(), N->getOriginalAlignment(), + N->getTBAAInfo()); // Legalized the chain result - switch anything that used the old chain to // use the new one. @@ -369,7 +372,8 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { case ISD::ANY_EXTEND: case ISD::ZERO_EXTEND: case ISD::SIGN_EXTEND: - Res = ScalarizeVecOp_EXTEND(N); + case ISD::TRUNCATE: + Res = ScalarizeVecOp_UnaryOp(N); break; case ISD::CONCAT_VECTORS: Res = ScalarizeVecOp_CONCAT_VECTORS(N); @@ -408,7 +412,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) { /// ScalarizeVecOp_EXTEND - If the value to extend is a vector that needs /// to be scalarized, it must be <1 x ty>. Extend the element instead. -SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTEND(SDNode *N) { +SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) { assert(N->getValueType(0).getVectorNumElements() == 1 && "Unexected vector type!"); SDValue Elt = GetScalarizedVector(N->getOperand(0)); @@ -455,12 +459,12 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ N->getBasePtr(), N->getPointerInfo(), N->getMemoryVT().getVectorElementType(), N->isVolatile(), N->isNonTemporal(), - N->getAlignment()); + N->getAlignment(), N->getTBAAInfo()); return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)), N->getBasePtr(), N->getPointerInfo(), N->isVolatile(), N->isNonTemporal(), - N->getOriginalAlignment()); + N->getOriginalAlignment(), N->getTBAAInfo()); } @@ -517,7 +521,6 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi); break; - case ISD::ANY_EXTEND: case ISD::CONVERT_RNDSAT: case ISD::CTLZ: case ISD::CTTZ: @@ -540,21 +543,27 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::FRINT: + case ISD::FROUND: case ISD::FSIN: case ISD::FSQRT: case ISD::FTRUNC: - case ISD::SIGN_EXTEND: case ISD::SINT_TO_FP: case ISD::TRUNCATE: case ISD::UINT_TO_FP: - case ISD::ZERO_EXTEND: SplitVecRes_UnaryOp(N, Lo, Hi); break; + case ISD::ANY_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + SplitVecRes_ExtendOp(N, Lo, Hi); + break; + case ISD::ADD: case ISD::SUB: case ISD::MUL: case ISD::FADD: + case ISD::FCOPYSIGN: case ISD::FSUB: case ISD::FMUL: case ISD::SDIV: @@ -615,7 +624,7 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, // We know the result is a vector. The input may be either a vector or a // scalar value. EVT LoVT, HiVT; - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); SDLoc dl(N); SDValue InOp = N->getOperand(0); @@ -670,7 +679,7 @@ void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT LoVT, HiVT; SDLoc dl(N); - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); unsigned LoNumElts = LoVT.getVectorNumElements(); SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+LoNumElts); Lo = DAG.getNode(ISD::BUILD_VECTOR, dl, LoVT, &LoOps[0], LoOps.size()); @@ -691,7 +700,7 @@ void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, } EVT LoVT, HiVT; - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+NumSubvectors); Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, &LoOps[0], LoOps.size()); @@ -707,7 +716,7 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDLoc dl(N); EVT LoVT, HiVT; - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx); uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); @@ -731,7 +740,8 @@ void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDLoc dl(N); EVT LoVT, HiVT; - GetSplitDestVTs(cast<VTSDNode>(N->getOperand(1))->getVT(), LoVT, HiVT); + llvm::tie(LoVT, HiVT) = + DAG.GetSplitDestVTs(cast<VTSDNode>(N->getOperand(1))->getVT()); Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, DAG.getValueType(LoVT)); @@ -783,7 +793,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, // Increment the pointer to the other part. unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8; StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, StackPtr.getValueType())); // Load the Hi part from the stack slot. Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), @@ -794,7 +804,7 @@ void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT LoVT, HiVT; SDLoc dl(N); - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0)); Hi = DAG.getUNDEF(HiVT); } @@ -804,7 +814,7 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!"); EVT LoVT, HiVT; SDLoc dl(LD); - GetSplitDestVTs(LD->getValueType(0), LoVT, HiVT); + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0)); ISD::LoadExtType ExtType = LD->getExtensionType(); SDValue Ch = LD->getChain(); @@ -815,20 +825,22 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); bool isInvariant = LD->isInvariant(); + const MDNode *TBAAInfo = LD->getTBAAInfo(); EVT LoMemVT, HiMemVT; - GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT); + llvm::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset, LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal, - isInvariant, Alignment); + isInvariant, Alignment, TBAAInfo); unsigned IncrementSize = LoMemVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, LD->getPointerInfo().getWithOffset(IncrementSize), - HiMemVT, isVolatile, isNonTemporal, isInvariant, Alignment); + HiMemVT, isVolatile, isNonTemporal, isInvariant, Alignment, + TBAAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -847,24 +859,12 @@ void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT LoVT, HiVT; SDLoc DL(N); - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); // Split the input. - EVT InVT = N->getOperand(0).getValueType(); SDValue LL, LH, RL, RH; - EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), - LoVT.getVectorNumElements()); - LL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(0), - DAG.getConstant(0, TLI.getVectorIdxTy())); - LH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(0), - DAG.getConstant(InNVT.getVectorNumElements(), - TLI.getVectorIdxTy())); - - RL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(1), - DAG.getConstant(0, TLI.getVectorIdxTy())); - RH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(1), - DAG.getConstant(InNVT.getVectorNumElements(), - TLI.getVectorIdxTy())); + llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); + llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); @@ -875,22 +875,15 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, // Get the dest types - they may not match the input types, e.g. int_to_fp. EVT LoVT, HiVT; SDLoc dl(N); - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); // If the input also splits, handle it directly for a compile time speedup. // Otherwise split it by hand. EVT InVT = N->getOperand(0).getValueType(); - if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) { + if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) GetSplitVector(N->getOperand(0), Lo, Hi); - } else { - EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), - LoVT.getVectorNumElements()); - Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), - DAG.getConstant(0, TLI.getVectorIdxTy())); - Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), - DAG.getConstant(InNVT.getVectorNumElements(), - TLI.getVectorIdxTy())); - } + else + llvm::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0); if (N->getOpcode() == ISD::FP_ROUND) { Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1)); @@ -913,6 +906,58 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, } } +void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDLoc dl(N); + EVT SrcVT = N->getOperand(0).getValueType(); + EVT DestVT = N->getValueType(0); + EVT LoVT, HiVT; + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(DestVT); + + // We can do better than a generic split operation if the extend is doing + // more than just doubling the width of the elements and the following are + // true: + // - The number of vector elements is even, + // - the source type is legal, + // - the type of a split source is illegal, + // - the type of an extended (by doubling element size) source is legal, and + // - the type of that extended source when split is legal. + // + // This won't necessarily completely legalize the operation, but it will + // more effectively move in the right direction and prevent falling down + // to scalarization in many cases due to the input vector being split too + // far. + unsigned NumElements = SrcVT.getVectorNumElements(); + if ((NumElements & 1) == 0 && + SrcVT.getSizeInBits() * 2 < DestVT.getSizeInBits()) { + LLVMContext &Ctx = *DAG.getContext(); + EVT NewSrcVT = EVT::getVectorVT( + Ctx, EVT::getIntegerVT( + Ctx, SrcVT.getVectorElementType().getSizeInBits() * 2), + NumElements); + EVT SplitSrcVT = + EVT::getVectorVT(Ctx, SrcVT.getVectorElementType(), NumElements / 2); + EVT SplitLoVT, SplitHiVT; + llvm::tie(SplitLoVT, SplitHiVT) = DAG.GetSplitDestVTs(NewSrcVT); + if (TLI.isTypeLegal(SrcVT) && !TLI.isTypeLegal(SplitSrcVT) && + TLI.isTypeLegal(NewSrcVT) && TLI.isTypeLegal(SplitLoVT)) { + DEBUG(dbgs() << "Split vector extend via incremental extend:"; + N->dump(&DAG); dbgs() << "\n"); + // Extend the source vector by one step. + SDValue NewSrc = + DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0)); + // Get the low and high halves of the new, extended one step, vector. + llvm::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl); + // Extend those vector halves the rest of the way. + Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo); + Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi); + return; + } + } + // Fall back to the generic unary operator splitting otherwise. + SplitVecRes_UnaryOp(N, Lo, Hi); +} + void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo, SDValue &Hi) { // The low and high parts of the original input give four input vectors. @@ -1105,41 +1150,23 @@ SDValue DAGTypeLegalizer::SplitVecOp_VSELECT(SDNode *N, unsigned OpNo) { SDValue Mask = N->getOperand(0); SDValue Src0 = N->getOperand(1); SDValue Src1 = N->getOperand(2); + EVT Src0VT = Src0.getValueType(); SDLoc DL(N); - EVT MaskVT = Mask.getValueType(); - assert(MaskVT.isVector() && "VSELECT without a vector mask?"); + assert(Mask.getValueType().isVector() && "VSELECT without a vector mask?"); SDValue Lo, Hi; GetSplitVector(N->getOperand(0), Lo, Hi); assert(Lo.getValueType() == Hi.getValueType() && "Lo and Hi have differing types"); - unsigned LoNumElts = Lo.getValueType().getVectorNumElements(); - unsigned HiNumElts = Hi.getValueType().getVectorNumElements(); - assert(LoNumElts == HiNumElts && "Asymmetric vector split?"); - - LLVMContext &Ctx = *DAG.getContext(); - SDValue Zero = DAG.getConstant(0, TLI.getVectorIdxTy()); - SDValue LoElts = DAG.getConstant(LoNumElts, TLI.getVectorIdxTy()); - EVT Src0VT = Src0.getValueType(); - EVT Src0EltTy = Src0VT.getVectorElementType(); - EVT MaskEltTy = MaskVT.getVectorElementType(); - - EVT LoOpVT = EVT::getVectorVT(Ctx, Src0EltTy, LoNumElts); - EVT LoMaskVT = EVT::getVectorVT(Ctx, MaskEltTy, LoNumElts); - EVT HiOpVT = EVT::getVectorVT(Ctx, Src0EltTy, HiNumElts); - EVT HiMaskVT = EVT::getVectorVT(Ctx, MaskEltTy, HiNumElts); + EVT LoOpVT, HiOpVT; + llvm::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(Src0VT); + assert(LoOpVT == HiOpVT && "Asymmetric vector split?"); - SDValue LoOp0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoOpVT, Src0, Zero); - SDValue LoOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoOpVT, Src1, Zero); - - SDValue HiOp0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiOpVT, Src0, LoElts); - SDValue HiOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiOpVT, Src1, LoElts); - - SDValue LoMask = - DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoMaskVT, Mask, Zero); - SDValue HiMask = - DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiMaskVT, Mask, LoElts); + SDValue LoOp0, HiOp0, LoOp1, HiOp1, LoMask, HiMask; + llvm::tie(LoOp0, HiOp0) = DAG.SplitVector(Src0, DL); + llvm::tie(LoOp1, HiOp1) = DAG.SplitVector(Src1, DL); + llvm::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL); SDValue LoSelect = DAG.getNode(ISD::VSELECT, DL, LoOpVT, LoMask, LoOp0, LoOp1); @@ -1249,33 +1276,34 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { unsigned Alignment = N->getOriginalAlignment(); bool isVol = N->isVolatile(); bool isNT = N->isNonTemporal(); + const MDNode *TBAAInfo = N->getTBAAInfo(); SDValue Lo, Hi; GetSplitVector(N->getOperand(1), Lo, Hi); EVT LoMemVT, HiMemVT; - GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT); + llvm::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); unsigned IncrementSize = LoMemVT.getSizeInBits()/8; if (isTruncating) Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), - LoMemVT, isVol, isNT, Alignment); + LoMemVT, isVol, isNT, Alignment, TBAAInfo); else Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), - isVol, isNT, Alignment); + isVol, isNT, Alignment, TBAAInfo); // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); if (isTruncating) Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), - HiMemVT, isVol, isNT, Alignment); + HiMemVT, isVol, isNT, Alignment, TBAAInfo); else Hi = DAG.getStore(Ch, DL, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), - isVol, isNT, Alignment); + isVol, isNT, Alignment, TBAAInfo); return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); } @@ -1341,13 +1369,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) { SDLoc DL(N); // Extract the halves of the input via extract_subvector. - EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), - InVT.getVectorElementType(), NumElements/2); - SDValue InLoVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, InVec, - DAG.getConstant(0, TLI.getVectorIdxTy())); - SDValue InHiVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, InVec, - DAG.getConstant(NumElements/2, - TLI.getVectorIdxTy())); + SDValue InLoVec, InHiVec; + llvm::tie(InLoVec, InHiVec) = DAG.SplitVector(InVec, DL); // Truncate them to 1/2 the element size. EVT HalfElementVT = EVT::getIntegerVT(*DAG.getContext(), InElementSize/2); EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, @@ -1446,27 +1469,31 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::VECTOR_SHUFFLE: Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N)); break; + case ISD::ADD: case ISD::AND: case ISD::BSWAP: + case ISD::MUL: + case ISD::MULHS: + case ISD::MULHU: + case ISD::OR: + case ISD::SUB: + case ISD::XOR: + Res = WidenVecRes_Binary(N); + break; + case ISD::FADD: case ISD::FCOPYSIGN: - case ISD::FDIV: case ISD::FMUL: case ISD::FPOW: - case ISD::FREM: case ISD::FSUB: - case ISD::MUL: - case ISD::MULHS: - case ISD::MULHU: - case ISD::OR: + case ISD::FDIV: + case ISD::FREM: case ISD::SDIV: - case ISD::SREM: case ISD::UDIV: + case ISD::SREM: case ISD::UREM: - case ISD::SUB: - case ISD::XOR: - Res = WidenVecRes_Binary(N); + Res = WidenVecRes_BinaryCanTrap(N); break; case ISD::FPOWI: @@ -1507,6 +1534,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::FNEARBYINT: case ISD::FNEG: case ISD::FRINT: + case ISD::FROUND: case ISD::FSIN: case ISD::FSQRT: case ISD::FTRUNC: @@ -1534,6 +1562,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) { SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { // Binary op widening. + SDLoc dl(N); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2); +} + +SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { + // Binary op widening for operations that can trap. unsigned Opcode = N->getOpcode(); SDLoc dl(N); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); @@ -2532,6 +2569,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); bool isInvariant = LD->isInvariant(); + const MDNode *TBAAInfo = LD->getTBAAInfo(); int LdWidth = LdVT.getSizeInBits(); int WidthDiff = WidenWidth - LdWidth; // Difference @@ -2541,7 +2579,8 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); int NewVTWidth = NewVT.getSizeInBits(); SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(), - isVolatile, isNonTemporal, isInvariant, Align); + isVolatile, isNonTemporal, isInvariant, Align, + TBAAInfo); LdChain.push_back(LdOp.getValue(1)); // Check if we can load the element with one instruction @@ -2577,7 +2616,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, unsigned Increment = NewVTWidth / 8; Offset += Increment; BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, - DAG.getIntPtrConstant(Increment)); + DAG.getConstant(Increment, BasePtr.getValueType())); SDValue L; if (LdWidth < NewVTWidth) { @@ -2586,7 +2625,8 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, NewVTWidth = NewVT.getSizeInBits(); L = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo().getWithOffset(Offset), isVolatile, - isNonTemporal, isInvariant, MinAlign(Align, Increment)); + isNonTemporal, isInvariant, MinAlign(Align, Increment), + TBAAInfo); LdChain.push_back(L.getValue(1)); if (L->getValueType(0).isVector()) { SmallVector<SDValue, 16> Loads; @@ -2602,7 +2642,8 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, } else { L = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo().getWithOffset(Offset), isVolatile, - isNonTemporal, isInvariant, MinAlign(Align, Increment)); + isNonTemporal, isInvariant, MinAlign(Align, Increment), + TBAAInfo); LdChain.push_back(L.getValue(1)); } @@ -2682,6 +2723,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain, unsigned Align = LD->getAlignment(); bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); + const MDNode *TBAAInfo = LD->getTBAAInfo(); EVT EltVT = WidenVT.getVectorElementType(); EVT LdEltVT = LdVT.getVectorElementType(); @@ -2693,15 +2735,17 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain, unsigned Increment = LdEltVT.getSizeInBits() / 8; Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, LD->getPointerInfo(), - LdEltVT, isVolatile, isNonTemporal, Align); + LdEltVT, isVolatile, isNonTemporal, Align, TBAAInfo); LdChain.push_back(Ops[0].getValue(1)); unsigned i = 0, Offset = Increment; for (i=1; i < NumElts; ++i, Offset += Increment) { SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), - BasePtr, DAG.getIntPtrConstant(Offset)); + BasePtr, + DAG.getConstant(Offset, + BasePtr.getValueType())); Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, LD->getPointerInfo().getWithOffset(Offset), LdEltVT, - isVolatile, isNonTemporal, Align); + isVolatile, isNonTemporal, Align, TBAAInfo); LdChain.push_back(Ops[i].getValue(1)); } @@ -2724,6 +2768,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, unsigned Align = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); + const MDNode *TBAAInfo = ST->getTBAAInfo(); SDValue ValOp = GetWidenedVector(ST->getValue()); SDLoc dl(ST); @@ -2750,12 +2795,12 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset), isVolatile, isNonTemporal, - MinAlign(Align, Offset))); + MinAlign(Align, Offset), TBAAInfo)); StWidth -= NewVTWidth; Offset += Increment; Idx += NumVTElts; BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, - DAG.getIntPtrConstant(Increment)); + DAG.getConstant(Increment, BasePtr.getValueType())); } while (StWidth != 0 && StWidth >= NewVTWidth); } else { // Cast the vector to the scalar type we can store @@ -2770,11 +2815,11 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset), isVolatile, isNonTemporal, - MinAlign(Align, Offset))); + MinAlign(Align, Offset), TBAAInfo)); StWidth -= NewVTWidth; Offset += Increment; BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, - DAG.getIntPtrConstant(Increment)); + DAG.getConstant(Increment, BasePtr.getValueType())); } while (StWidth != 0 && StWidth >= NewVTWidth); // Restore index back to be relative to the original widen element type Idx = Idx * NewVTWidth / ValEltWidth; @@ -2792,6 +2837,7 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, unsigned Align = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); + const MDNode *TBAAInfo = ST->getTBAAInfo(); SDValue ValOp = GetWidenedVector(ST->getValue()); SDLoc dl(ST); @@ -2814,17 +2860,19 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, DAG.getConstant(0, TLI.getVectorIdxTy())); StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, ST->getPointerInfo(), StEltVT, - isVolatile, isNonTemporal, Align)); + isVolatile, isNonTemporal, Align, + TBAAInfo)); unsigned Offset = Increment; for (unsigned i=1; i < NumElts; ++i, Offset += Increment) { SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), - BasePtr, DAG.getIntPtrConstant(Offset)); + BasePtr, DAG.getConstant(Offset, + BasePtr.getValueType())); SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, DAG.getConstant(0, TLI.getVectorIdxTy())); StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, ST->getPointerInfo().getWithOffset(Offset), StEltVT, isVolatile, isNonTemporal, - MinAlign(Align, Offset))); + MinAlign(Align, Offset), TBAAInfo)); } } diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index d684164..1dd2128 100644 --- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -389,10 +389,9 @@ signed ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) { // Constants used to denote relative importance of // heuristic components for cost computation. static const unsigned PriorityOne = 200; -static const unsigned PriorityTwo = 100; -static const unsigned PriorityThree = 50; -static const unsigned PriorityFour = 15; -static const unsigned PriorityFive = 5; +static const unsigned PriorityTwo = 50; +static const unsigned PriorityThree = 15; +static const unsigned PriorityFour = 5; static const unsigned ScaleOne = 20; static const unsigned ScaleTwo = 10; static const unsigned ScaleThree = 5; @@ -449,7 +448,7 @@ signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) { if (N->isMachineOpcode()) { const MCInstrDesc &TID = TII->get(N->getMachineOpcode()); if (TID.isCall()) - ResCount += (PriorityThree + (ScaleThree*N->getNumValues())); + ResCount += (PriorityTwo + (ScaleThree*N->getNumValues())); } else switch (N->getOpcode()) { @@ -457,11 +456,11 @@ signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) { case ISD::TokenFactor: case ISD::CopyFromReg: case ISD::CopyToReg: - ResCount += PriorityFive; + ResCount += PriorityFour; break; case ISD::INLINEASM: - ResCount += PriorityFour; + ResCount += PriorityThree; break; } } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index f5fe168..1a562d7 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -718,7 +718,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { // indicate the scheduled cycle. SU->setHeightToAtLeast(CurCycle); - // Reserve resources for the scheduled intruction. + // Reserve resources for the scheduled instruction. EmitNode(SU); Sequence.push_back(SU); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 982dcc9..054e3dd 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -690,15 +690,6 @@ void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) { } #endif // NDEBUG -namespace { - struct OrderSorter { - bool operator()(const std::pair<unsigned, MachineInstr*> &A, - const std::pair<unsigned, MachineInstr*> &B) { - return A.first < B.first; - } - }; -} - /// ProcessSDDbgValues - Process SDDbgValues associated with this node. static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, @@ -744,7 +735,10 @@ ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, } MachineBasicBlock *BB = Emitter.getBlock(); - if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI()) { + if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI() || + // Fast-isel may have inserted some instructions, in which case the + // BB->back().isPHI() test will not fire when we want it to. + prior(Emitter.getInsertPos())->isPHI()) { // Did not insert any instruction. Orders.push_back(std::make_pair(Order, (MachineInstr*)0)); return; @@ -857,7 +851,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { // Sort the source order instructions and use the order to insert debug // values. - std::sort(Orders.begin(), Orders.end(), OrderSorter()); + std::sort(Orders.begin(), Orders.end(), less_first()); SDDbgInfo::DbgIterator DI = DAG->DbgBegin(); SDDbgInfo::DbgIterator DE = DAG->DbgEnd(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index bc6063c..45d5a4f 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -869,16 +869,19 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const { // EntryNode could meaningfully have debug info if we can find it... SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) - : TM(tm), TSI(*tm.getSelectionDAGInfo()), TTI(0), OptLevel(OL), + : TM(tm), TSI(*tm.getSelectionDAGInfo()), TTI(0), TLI(0), OptLevel(OL), EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)), - Root(getEntryNode()), UpdateListeners(0) { + Root(getEntryNode()), NewNodesMustHaveLegalTypes(false), + UpdateListeners(0) { AllNodes.push_back(&EntryNode); DbgInfo = new SDDbgInfo(); } -void SelectionDAG::init(MachineFunction &mf, const TargetTransformInfo *tti) { +void SelectionDAG::init(MachineFunction &mf, const TargetTransformInfo *tti, + const TargetLowering *tli) { MF = &mf; TTI = tti; + TLI = tli; Context = &mf.getFunction()->getContext(); } @@ -983,6 +986,54 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) { APInt NewVal = Elt->getValue().zext(EltVT.getSizeInBits()); Elt = ConstantInt::get(*getContext(), NewVal); } + // In other cases the element type is illegal and needs to be expanded, for + // example v2i64 on MIPS32. In this case, find the nearest legal type, split + // the value into n parts and use a vector type with n-times the elements. + // Then bitcast to the type requested. + // Legalizing constants too early makes the DAGCombiner's job harder so we + // only legalize if the DAG tells us we must produce legal types. + else if (NewNodesMustHaveLegalTypes && VT.isVector() && + TLI->getTypeAction(*getContext(), EltVT) == + TargetLowering::TypeExpandInteger) { + APInt NewVal = Elt->getValue(); + EVT ViaEltVT = TLI->getTypeToTransformTo(*getContext(), EltVT); + unsigned ViaEltSizeInBits = ViaEltVT.getSizeInBits(); + unsigned ViaVecNumElts = VT.getSizeInBits() / ViaEltSizeInBits; + EVT ViaVecVT = EVT::getVectorVT(*getContext(), ViaEltVT, ViaVecNumElts); + + // Check the temporary vector is the correct size. If this fails then + // getTypeToTransformTo() probably returned a type whose size (in bits) + // isn't a power-of-2 factor of the requested type size. + assert(ViaVecVT.getSizeInBits() == VT.getSizeInBits()); + + SmallVector<SDValue, 2> EltParts; + for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i) { + EltParts.push_back(getConstant(NewVal.lshr(i * ViaEltSizeInBits) + .trunc(ViaEltSizeInBits), + ViaEltVT, isT)); + } + + // EltParts is currently in little endian order. If we actually want + // big-endian order then reverse it now. + if (TLI->isBigEndian()) + std::reverse(EltParts.begin(), EltParts.end()); + + // The elements must be reversed when the element order is different + // to the endianness of the elements (because the BITCAST is itself a + // vector shuffle in this situation). However, we do not need any code to + // perform this reversal because getConstant() is producing a vector + // splat. + // This situation occurs in MIPS MSA. + + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) + Ops.insert(Ops.end(), EltParts.begin(), EltParts.end()); + + SDValue Result = getNode(ISD::BITCAST, SDLoc(), VT, + getNode(ISD::BUILD_VECTOR, SDLoc(), ViaVecVT, + &Ops[0], Ops.size())); + return Result; + } assert(Elt->getBitWidth() == EltVT.getSizeInBits() && "APInt size does not match type size!"); @@ -1077,9 +1128,10 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL, unsigned char TargetFlags) { assert((TargetFlags == 0 || isTargetGA) && "Cannot set target flags on target-independent globals"); + const TargetLowering *TLI = TM.getTargetLowering(); // Truncate (with sign-extension) the offset value to the pointer size. - unsigned BitWidth = TM.getTargetLowering()->getPointerTy().getSizeInBits(); + unsigned BitWidth = TLI->getPointerTypeSizeInBits(GV->getType()); if (BitWidth < 64) Offset = SignExtend64(Offset, BitWidth); @@ -1298,11 +1350,8 @@ static void commuteShuffle(SDValue &N1, SDValue &N2, SmallVectorImpl<int> &M) { SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue N2, const int *Mask) { - assert(N1.getValueType() == N2.getValueType() && "Invalid VECTOR_SHUFFLE"); - assert(VT.isVector() && N1.getValueType().isVector() && - "Vector Shuffle VTs must be a vectors"); - assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType() - && "Vector Shuffle VTs must have same element type"); + assert(VT == N1.getValueType() && VT == N2.getValueType() && + "Invalid VECTOR_SHUFFLE"); // Canonicalize shuffle undef, undef -> undef if (N1.getOpcode() == ISD::UNDEF && N2.getOpcode() == ISD::UNDEF) @@ -1351,17 +1400,13 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, commuteShuffle(N1, N2, MaskVec); } - // If Identity shuffle, or all shuffle in to undef, return that node. - bool AllUndef = true; + // If Identity shuffle return that node. bool Identity = true; for (unsigned i = 0; i != NElts; ++i) { if (MaskVec[i] >= 0 && MaskVec[i] != (int)i) Identity = false; - if (MaskVec[i] >= 0) AllUndef = false; } - if (Identity && NElts == N1.getValueType().getVectorNumElements()) + if (Identity && NElts) return N1; - if (AllUndef) - return getUNDEF(VT); FoldingSetNodeID ID; SDValue Ops[2] = { N1, N2 }; @@ -1380,7 +1425,9 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, memcpy(MaskAlloc, &MaskVec[0], NElts * sizeof(int)); ShuffleVectorSDNode *N = - new (NodeAllocator) ShuffleVectorSDNode(VT, dl.getIROrder(), dl.getDebugLoc(), N1, N2, MaskAlloc); + new (NodeAllocator) ShuffleVectorSDNode(VT, dl.getIROrder(), + dl.getDebugLoc(), N1, N2, + MaskAlloc); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -1403,8 +1450,9 @@ SDValue SelectionDAG::getConvertRndSat(EVT VT, SDLoc dl, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - CvtRndSatSDNode *N = new (NodeAllocator) CvtRndSatSDNode(VT, dl.getIROrder(), dl.getDebugLoc(), Ops, 5, - Code); + CvtRndSatSDNode *N = new (NodeAllocator) CvtRndSatSDNode(VT, dl.getIROrder(), + dl.getDebugLoc(), + Ops, 5, Code); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -1447,7 +1495,8 @@ SDValue SelectionDAG::getEHLabel(SDLoc dl, SDValue Root, MCSymbol *Label) { if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) EHLabelSDNode(dl.getIROrder(), dl.getDebugLoc(), Root, Label); + SDNode *N = new (NodeAllocator) EHLabelSDNode(dl.getIROrder(), + dl.getDebugLoc(), Root, Label); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -1510,6 +1559,26 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) { return SDValue(N, 0); } +/// getAddrSpaceCast - Return an AddrSpaceCastSDNode. +SDValue SelectionDAG::getAddrSpaceCast(SDLoc dl, EVT VT, SDValue Ptr, + unsigned SrcAS, unsigned DestAS) { + SDValue Ops[] = {Ptr}; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::ADDRSPACECAST, getVTList(VT), &Ops[0], 1); + ID.AddInteger(SrcAS); + ID.AddInteger(DestAS); + + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) AddrSpaceCastSDNode(dl.getIROrder(), + dl.getDebugLoc(), + VT, Ptr, SrcAS, DestAS); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} /// getShiftAmountOperand - Return the specified value casted to /// the target's desired shift amount type. @@ -1561,7 +1630,12 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, case ISD::SETFALSE: case ISD::SETFALSE2: return getConstant(0, VT); case ISD::SETTRUE: - case ISD::SETTRUE2: return getConstant(1, VT); + case ISD::SETTRUE2: { + const TargetLowering *TLI = TM.getTargetLowering(); + TargetLowering::BooleanContent Cnt = TLI->getBooleanContents(VT.isVector()); + return getConstant( + Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, VT); + } case ISD::SETOEQ: case ISD::SETOGT: @@ -1643,7 +1717,12 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, } } else { // Ensure that the constant occurs on the RHS. - return getSetCC(dl, VT, N2, N1, ISD::getSetCCSwappedOperands(Cond)); + ISD::CondCode SwappedCond = ISD::getSetCCSwappedOperands(Cond); + MVT CompVT = N1.getValueType().getSimpleVT(); + if (!TM.getTargetLowering()->isCondCodeLegal(SwappedCond, CompVT)) + return SDValue(); + + return getSetCC(dl, VT, N2, N1, SwappedCond); } } @@ -1942,7 +2021,6 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, case ISD::SIGN_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarType().getSizeInBits(); - APInt InSignBit = APInt::getSignBit(InBits); APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits); KnownZero = KnownZero.trunc(InBits); @@ -2054,7 +2132,6 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, const APInt &RA = Rem->getAPIntValue().abs(); if (RA.isPowerOf2()) { APInt LowBits = RA - 1; - APInt Mask2 = LowBits | APInt::getSignBit(BitWidth); ComputeMaskedBits(Op.getOperand(0), KnownZero2,KnownOne2,Depth+1); // The low bits of the first operand are unchanged by the srem. @@ -2150,7 +2227,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ } case ISD::SIGN_EXTEND: - Tmp = VTBits-Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); + Tmp = + VTBits-Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp; case ISD::SIGN_EXTEND_INREG: @@ -2411,7 +2489,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT) { if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), getVTList(VT)); + SDNode *N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), getVTList(VT)); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); @@ -2672,10 +2751,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, Operand); + N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTs, Operand); CSEMap.InsertNode(N, IP); } else { - N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, Operand); + N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTs, Operand); } AllNodes.push_back(N); @@ -3073,9 +3154,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, if (VT.isSimple() && N1.getValueType().isSimple()) { assert(VT.isVector() && N1.getValueType().isVector() && "Extract subvector VTs must be a vectors!"); - assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType() && + assert(VT.getVectorElementType() == + N1.getValueType().getVectorElementType() && "Extract subvector VTs must have the same element type!"); - assert(VT.getSimpleVT() <= N1.getValueType().getSimpleVT() && + assert(VT.getSimpleVT() <= N1.getSimpleValueType() && "Extract subvector must be from larger vector to smaller vector!"); if (isa<ConstantSDNode>(Index.getNode())) { @@ -3086,7 +3168,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, } // Trivial extraction. - if (VT.getSimpleVT() == N1.getValueType().getSimpleVT()) + if (VT.getSimpleVT() == N1.getSimpleValueType()) return N1; } break; @@ -3244,10 +3326,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2); + N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTs, N1, N2); CSEMap.InsertNode(N, IP); } else { - N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2); + N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTs, N1, N2); } AllNodes.push_back(N); @@ -3316,7 +3400,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, "Insert subvector VTs must be a vectors"); assert(VT == N1.getValueType() && "Dest and insert subvector source types must match!"); - assert(N2.getValueType().getSimpleVT() <= N1.getValueType().getSimpleVT() && + assert(N2.getSimpleValueType() <= N1.getSimpleValueType() && "Insert subvector must be from smaller vector to larger vector!"); if (isa<ConstantSDNode>(Index.getNode())) { assert((N2.getValueType().getVectorNumElements() + @@ -3326,7 +3410,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, } // Trivial insertion. - if (VT.getSimpleVT() == N2.getValueType().getSimpleVT()) + if (VT.getSimpleVT() == N2.getSimpleValueType()) return N2; } break; @@ -3349,10 +3433,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2, N3); + N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTs, N1, N2, N3); CSEMap.InsertNode(N, IP); } else { - N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2, N3); + N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTs, N1, N2, N3); } AllNodes.push_back(N); @@ -3771,7 +3857,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl, for (unsigned i = 0; i < NumMemOps; i++) { EVT VT = MemOps[i]; unsigned VTSize = VT.getSizeInBits() / 8; - SDValue Value, Store; + SDValue Value; Value = DAG.getLoad(VT, dl, Chain, getMemBasePlusOffset(Src, SrcOff, dl, DAG), @@ -3787,7 +3873,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl, for (unsigned i = 0; i < NumMemOps; i++) { EVT VT = MemOps[i]; unsigned VTSize = VT.getSizeInBits() / 8; - SDValue Value, Store; + SDValue Store; Store = DAG.getStore(Chain, dl, LoadValues[i], getMemBasePlusOffset(Dst, DstOff, dl, DAG), @@ -3800,6 +3886,24 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl, &OutChains[0], OutChains.size()); } +/// \brief Lower the call to 'memset' intrinsic function into a series of store +/// operations. +/// +/// \param DAG Selection DAG where lowered code is placed. +/// \param dl Link to corresponding IR location. +/// \param Chain Control flow dependency. +/// \param Dst Pointer to destination memory location. +/// \param Src Value of byte to write into the memory. +/// \param Size Number of bytes to write. +/// \param Align Alignment of the destination in bytes. +/// \param isVol True if destination is volatile. +/// \param DstPtrInfo IR information on the memory pointer. +/// \returns New head in the control flow, if lowering was successful, empty +/// SDValue otherwise. +/// +/// The function tries to replace 'llvm.memset' intrinsic with several store +/// operations and value calculation code. This is usually profitable for small +/// memory size. static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, @@ -4078,6 +4182,37 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst, } SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, + SDVTList VTList, SDValue* Ops, unsigned NumOps, + MachineMemOperand *MMO, + AtomicOrdering Ordering, + SynchronizationScope SynchScope) { + FoldingSetNodeID ID; + ID.AddInteger(MemVT.getRawBits()); + AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void* IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + cast<AtomicSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + + // Allocate the operands array for the node out of the BumpPtrAllocator, since + // SDNode doesn't have access to it. This memory will be "leaked" when + // the node is deallocated, but recovered when the allocator is released. + // If the number of operands is less than 5 we use AtomicSDNode's internal + // storage. + SDUse *DynOps = NumOps > 4 ? OperandAllocator.Allocate<SDUse>(NumOps) : 0; + + SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl.getIROrder(), + dl.getDebugLoc(), VTList, MemVT, + Ops, DynOps, NumOps, MMO, + Ordering, SynchScope); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Cmp, SDValue Swp, MachinePointerInfo PtrInfo, unsigned Alignment, @@ -4117,22 +4252,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, EVT VT = Cmp.getValueType(); SDVTList VTs = getVTList(VT, MVT::Other); - FoldingSetNodeID ID; - ID.AddInteger(MemVT.getRawBits()); SDValue Ops[] = {Chain, Ptr, Cmp, Swp}; - AddNodeIDNode(ID, Opcode, VTs, Ops, 4); - ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); - void* IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - cast<AtomicSDNode>(E)->refineAlignment(MMO); - return SDValue(E, 0); - } - SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl.getIROrder(), dl.getDebugLoc(), VTs, MemVT, Chain, - Ptr, Cmp, Swp, MMO, Ordering, - SynchScope); - CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); - return SDValue(N, 0); + return getAtomic(Opcode, dl, MemVT, VTs, Ops, 4, MMO, Ordering, SynchScope); } SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, @@ -4190,22 +4311,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDVTList VTs = Opcode == ISD::ATOMIC_STORE ? getVTList(MVT::Other) : getVTList(VT, MVT::Other); - FoldingSetNodeID ID; - ID.AddInteger(MemVT.getRawBits()); SDValue Ops[] = {Chain, Ptr, Val}; - AddNodeIDNode(ID, Opcode, VTs, Ops, 3); - ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); - void* IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - cast<AtomicSDNode>(E)->refineAlignment(MMO); - return SDValue(E, 0); - } - SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl.getIROrder(), dl.getDebugLoc(), VTs, MemVT, Chain, - Ptr, Val, MMO, - Ordering, SynchScope); - CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); - return SDValue(N, 0); + return getAtomic(Opcode, dl, MemVT, VTs, Ops, 3, MMO, Ordering, SynchScope); } SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, @@ -4248,21 +4355,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, assert(Opcode == ISD::ATOMIC_LOAD && "Invalid Atomic Op"); SDVTList VTs = getVTList(VT, MVT::Other); - FoldingSetNodeID ID; - ID.AddInteger(MemVT.getRawBits()); SDValue Ops[] = {Chain, Ptr}; - AddNodeIDNode(ID, Opcode, VTs, Ops, 2); - ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); - void* IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - cast<AtomicSDNode>(E)->refineAlignment(MMO); - return SDValue(E, 0); - } - SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl.getIROrder(), dl.getDebugLoc(), VTs, MemVT, Chain, - Ptr, MMO, Ordering, SynchScope); - CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); - return SDValue(N, 0); + return getAtomic(Opcode, dl, MemVT, VTs, Ops, 2, MMO, Ordering, SynchScope); } /// getMergeValues - Create a MERGE_VALUES node from the given operands. @@ -4339,12 +4433,14 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, return SDValue(E, 0); } - N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl.getIROrder(), dl.getDebugLoc(), VTList, Ops, NumOps, - MemVT, MMO); + N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl.getIROrder(), + dl.getDebugLoc(), VTList, Ops, + NumOps, MemVT, MMO); CSEMap.InsertNode(N, IP); } else { - N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl.getIROrder(), dl.getDebugLoc(), VTList, Ops, NumOps, - MemVT, MMO); + N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl.getIROrder(), + dl.getDebugLoc(), VTList, Ops, + NumOps, MemVT, MMO); } AllNodes.push_back(N); return SDValue(N, 0); @@ -4458,7 +4554,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, cast<LoadSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } - SDNode *N = new (NodeAllocator) LoadSDNode(Ops, dl.getIROrder(), dl.getDebugLoc(), VTs, AM, ExtType, + SDNode *N = new (NodeAllocator) LoadSDNode(Ops, dl.getIROrder(), + dl.getDebugLoc(), VTs, AM, ExtType, MemVT, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); @@ -4478,6 +4575,14 @@ SDValue SelectionDAG::getLoad(EVT VT, SDLoc dl, TBAAInfo, Ranges); } +SDValue SelectionDAG::getLoad(EVT VT, SDLoc dl, + SDValue Chain, SDValue Ptr, + MachineMemOperand *MMO) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, + VT, MMO); +} + SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, @@ -4490,6 +4595,14 @@ SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT, } +SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT, + SDValue Chain, SDValue Ptr, EVT MemVT, + MachineMemOperand *MMO) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, + MemVT, MMO); +} + SDValue SelectionDAG::getIndexedLoad(SDValue OrigLoad, SDLoc dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM) { @@ -4548,8 +4661,9 @@ SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val, cast<StoreSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } - SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(), dl.getDebugLoc(), VTs, ISD::UNINDEXED, - false, VT, MMO); + SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(), + dl.getDebugLoc(), VTs, + ISD::UNINDEXED, false, VT, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -4616,8 +4730,9 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, cast<StoreSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } - SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(), dl.getDebugLoc(), VTs, ISD::UNINDEXED, - true, SVT, MMO); + SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(), + dl.getDebugLoc(), VTs, + ISD::UNINDEXED, true, SVT, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -4640,7 +4755,8 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, SDLoc dl, SDValue Base, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(), dl.getDebugLoc(), VTs, AM, + SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(), + dl.getDebugLoc(), VTs, AM, ST->isTruncatingStore(), ST->getMemoryVT(), ST->getMemOperand()); @@ -4715,10 +4831,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, Ops, NumOps); + N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), + VTs, Ops, NumOps); CSEMap.InsertNode(N, IP); } else { - N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, Ops, NumOps); + N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), + VTs, Ops, NumOps); } AllNodes.push_back(N); @@ -4781,26 +4899,36 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, return SDValue(E, 0); if (NumOps == 1) { - N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList, Ops[0]); + N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTList, Ops[0]); } else if (NumOps == 2) { - N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList, Ops[0], Ops[1]); + N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTList, Ops[0], + Ops[1]); } else if (NumOps == 3) { - N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList, Ops[0], Ops[1], - Ops[2]); + N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTList, Ops[0], + Ops[1], Ops[2]); } else { - N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList, Ops, NumOps); + N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), + VTList, Ops, NumOps); } CSEMap.InsertNode(N, IP); } else { if (NumOps == 1) { - N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList, Ops[0]); + N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTList, Ops[0]); } else if (NumOps == 2) { - N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList, Ops[0], Ops[1]); + N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTList, Ops[0], + Ops[1]); } else if (NumOps == 3) { - N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList, Ops[0], Ops[1], - Ops[2]); + N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTList, Ops[0], + Ops[1], Ops[2]); } else { - N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList, Ops, NumOps); + N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), + VTList, Ops, NumOps); } } AllNodes.push_back(N); @@ -4851,76 +4979,81 @@ SDVTList SelectionDAG::getVTList(EVT VT) { } SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2) { - for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(), - E = VTList.rend(); I != E; ++I) - if (I->NumVTs == 2 && I->VTs[0] == VT1 && I->VTs[1] == VT2) - return *I; - - EVT *Array = Allocator.Allocate<EVT>(2); - Array[0] = VT1; - Array[1] = VT2; - SDVTList Result = makeVTList(Array, 2); - VTList.push_back(Result); - return Result; + FoldingSetNodeID ID; + ID.AddInteger(2U); + ID.AddInteger(VT1.getRawBits()); + ID.AddInteger(VT2.getRawBits()); + + void *IP = 0; + SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); + if (Result == NULL) { + EVT *Array = Allocator.Allocate<EVT>(2); + Array[0] = VT1; + Array[1] = VT2; + Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 2); + VTListMap.InsertNode(Result, IP); + } + return Result->getSDVTList(); } SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3) { - for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(), - E = VTList.rend(); I != E; ++I) - if (I->NumVTs == 3 && I->VTs[0] == VT1 && I->VTs[1] == VT2 && - I->VTs[2] == VT3) - return *I; - - EVT *Array = Allocator.Allocate<EVT>(3); - Array[0] = VT1; - Array[1] = VT2; - Array[2] = VT3; - SDVTList Result = makeVTList(Array, 3); - VTList.push_back(Result); - return Result; + FoldingSetNodeID ID; + ID.AddInteger(3U); + ID.AddInteger(VT1.getRawBits()); + ID.AddInteger(VT2.getRawBits()); + ID.AddInteger(VT3.getRawBits()); + + void *IP = 0; + SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); + if (Result == NULL) { + EVT *Array = Allocator.Allocate<EVT>(3); + Array[0] = VT1; + Array[1] = VT2; + Array[2] = VT3; + Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 3); + VTListMap.InsertNode(Result, IP); + } + return Result->getSDVTList(); } SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4) { - for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(), - E = VTList.rend(); I != E; ++I) - if (I->NumVTs == 4 && I->VTs[0] == VT1 && I->VTs[1] == VT2 && - I->VTs[2] == VT3 && I->VTs[3] == VT4) - return *I; - - EVT *Array = Allocator.Allocate<EVT>(4); - Array[0] = VT1; - Array[1] = VT2; - Array[2] = VT3; - Array[3] = VT4; - SDVTList Result = makeVTList(Array, 4); - VTList.push_back(Result); - return Result; + FoldingSetNodeID ID; + ID.AddInteger(4U); + ID.AddInteger(VT1.getRawBits()); + ID.AddInteger(VT2.getRawBits()); + ID.AddInteger(VT3.getRawBits()); + ID.AddInteger(VT4.getRawBits()); + + void *IP = 0; + SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); + if (Result == NULL) { + EVT *Array = Allocator.Allocate<EVT>(4); + Array[0] = VT1; + Array[1] = VT2; + Array[2] = VT3; + Array[3] = VT4; + Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 4); + VTListMap.InsertNode(Result, IP); + } + return Result->getSDVTList(); } SDVTList SelectionDAG::getVTList(const EVT *VTs, unsigned NumVTs) { - switch (NumVTs) { - case 0: llvm_unreachable("Cannot have nodes without results!"); - case 1: return getVTList(VTs[0]); - case 2: return getVTList(VTs[0], VTs[1]); - case 3: return getVTList(VTs[0], VTs[1], VTs[2]); - case 4: return getVTList(VTs[0], VTs[1], VTs[2], VTs[3]); - default: break; + FoldingSetNodeID ID; + ID.AddInteger(NumVTs); + for (unsigned index = 0; index < NumVTs; index++) { + ID.AddInteger(VTs[index].getRawBits()); } - for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(), - E = VTList.rend(); I != E; ++I) { - if (I->NumVTs != NumVTs || VTs[0] != I->VTs[0] || VTs[1] != I->VTs[1]) - continue; - - if (std::equal(&VTs[2], &VTs[NumVTs], &I->VTs[2])) - return *I; + void *IP = 0; + SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); + if (Result == NULL) { + EVT *Array = Allocator.Allocate<EVT>(NumVTs); + std::copy(VTs, VTs + NumVTs, Array); + Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, NumVTs); + VTListMap.InsertNode(Result, IP); } - - EVT *Array = Allocator.Allocate<EVT>(NumVTs); - std::copy(VTs, VTs+NumVTs, Array); - SDVTList Result = makeVTList(Array, NumVTs); - VTList.push_back(Result); - return Result; + return Result->getSDVTList(); } @@ -5410,7 +5543,8 @@ SelectionDAG::getMachineNode(unsigned Opcode, SDLoc DL, SDVTList VTs, } // Allocate a new MachineSDNode. - N = new (NodeAllocator) MachineSDNode(~Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); + N = new (NodeAllocator) MachineSDNode(~Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTs); // Initialize the operands list. if (NumOps > array_lengthof(N->LocalOperands)) @@ -5916,6 +6050,12 @@ GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, unsigned Order, TheGlobal = GA; } +AddrSpaceCastSDNode::AddrSpaceCastSDNode(unsigned Order, DebugLoc dl, EVT VT, + SDValue X, unsigned SrcAS, + unsigned DestAS) + : UnarySDNode(ISD::ADDRSPACECAST, Order, dl, getSDVTList(VT), X), + SrcAddrSpace(SrcAS), DestAddrSpace(DestAS) {} + MemSDNode::MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs, EVT memvt, MachineMemOperand *mmo) : SDNode(Opc, Order, dl, VTs), MemoryVT(memvt), MMO(mmo) { @@ -6162,8 +6302,8 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { case ISD::ROTL: case ISD::ROTR: Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0], - getShiftAmountOperand(Operands[0].getValueType(), - Operands[1]))); + getShiftAmountOperand(Operands[0].getValueType(), + Operands[1]))); break; case ISD::SIGN_EXTEND_INREG: case ISD::FP_ROUND_INREG: { @@ -6235,7 +6375,7 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { int64_t GVOffset = 0; const TargetLowering *TLI = TM.getTargetLowering(); if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { - unsigned PtrWidth = TLI->getPointerTy().getSizeInBits(); + unsigned PtrWidth = TLI->getPointerTypeSizeInBits(GV->getType()); APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0); llvm::ComputeMaskedBits(const_cast<GlobalValue*>(GV), KnownZero, KnownOne, TLI->getDataLayout()); @@ -6268,6 +6408,38 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { return 0; } +/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type +/// which is split (or expanded) into two not necessarily identical pieces. +std::pair<EVT, EVT> SelectionDAG::GetSplitDestVTs(const EVT &VT) const { + // Currently all types are split in half. + EVT LoVT, HiVT; + if (!VT.isVector()) { + LoVT = HiVT = TLI->getTypeToTransformTo(*getContext(), VT); + } else { + unsigned NumElements = VT.getVectorNumElements(); + assert(!(NumElements & 1) && "Splitting vector, but not in half!"); + LoVT = HiVT = EVT::getVectorVT(*getContext(), VT.getVectorElementType(), + NumElements/2); + } + return std::make_pair(LoVT, HiVT); +} + +/// SplitVector - Split the vector with EXTRACT_SUBVECTOR and return the +/// low/high part. +std::pair<SDValue, SDValue> +SelectionDAG::SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, + const EVT &HiVT) { + assert(LoVT.getVectorNumElements() + HiVT.getVectorNumElements() <= + N.getValueType().getVectorNumElements() && + "More vector elements requested than available!"); + SDValue Lo, Hi; + Lo = getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, N, + getConstant(0, TLI->getVectorIdxTy())); + Hi = getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, N, + getConstant(LoVT.getVectorNumElements(), TLI->getVectorIdxTy())); + return std::make_pair(Lo, Hi); +} + // getAddressSpace - Return the address space this GlobalAddress belongs to. unsigned GlobalAddressSDNode::getAddressSpace() const { return getGlobal()->getType()->getAddressSpace(); @@ -6389,7 +6561,7 @@ static void checkForCyclesHelper(const SDNode *N, void llvm::checkForCycles(const llvm::SDNode *N) { #ifdef XDEBUG - assert(N && "Checking nonexistant SDNode"); + assert(N && "Checking nonexistent SDNode"); SmallPtrSet<const SDNode*, 32> visited; SmallPtrSet<const SDNode*, 32> checked; checkForCyclesHelper(N, visited, checked); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index b9f4381..2b2713d 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -33,6 +33,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/DebugInfo.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" @@ -49,7 +50,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/IntegersSubsetMapping.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" @@ -58,6 +58,7 @@ #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetSelectionDAGInfo.h" #include <algorithm> using namespace llvm; @@ -1063,8 +1064,10 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) return DAG.getGlobalAddress(GV, getCurSDLoc(), VT); - if (isa<ConstantPointerNull>(C)) - return DAG.getConstant(0, TLI->getPointerTy()); + if (isa<ConstantPointerNull>(C)) { + unsigned AS = V->getType()->getPointerAddressSpace(); + return DAG.getConstant(0, TLI->getPointerTy(AS)); + } if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) return DAG.getConstantFP(*CFP, VT); @@ -1268,7 +1271,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { for (unsigned i = 0; i < NumParts; ++i) { Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(), - /*isfixed=*/true, 0, 0)); + VT, /*isfixed=*/true, 0, 0)); OutVals.push_back(Parts[i]); } } @@ -1617,8 +1620,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, } else Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC); } else { - assert(CB.CC == ISD::SETCC_INVALID && - "Condition is undefined for to-the-range belonging check."); + assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now"); const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue(); const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue(); @@ -1626,9 +1628,9 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, SDValue CmpOp = getValue(CB.CmpMHS); EVT VT = CmpOp.getValueType(); - if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(false)) { + if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) { Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT), - ISD::SETULE); + ISD::SETLE); } else { SDValue SUB = DAG.getNode(ISD::SUB, dl, VT, CmpOp, DAG.getConstant(Low, VT)); @@ -1741,6 +1743,77 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, DAG.setRoot(BrCond); } +/// Codegen a new tail for a stack protector check ParentMBB which has had its +/// tail spliced into a stack protector check success bb. +/// +/// For a high level explanation of how this fits into the stack protector +/// generation see the comment on the declaration of class +/// StackProtectorDescriptor. +void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, + MachineBasicBlock *ParentBB) { + + // First create the loads to the guard/stack slot for the comparison. + const TargetLowering *TLI = TM.getTargetLowering(); + EVT PtrTy = TLI->getPointerTy(); + + MachineFrameInfo *MFI = ParentBB->getParent()->getFrameInfo(); + int FI = MFI->getStackProtectorIndex(); + + const Value *IRGuard = SPD.getGuard(); + SDValue GuardPtr = getValue(IRGuard); + SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy); + + unsigned Align = + TLI->getDataLayout()->getPrefTypeAlignment(IRGuard->getType()); + SDValue Guard = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(), + GuardPtr, MachinePointerInfo(IRGuard, 0), + true, false, false, Align); + + SDValue StackSlot = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(), + StackSlotPtr, + MachinePointerInfo::getFixedStack(FI), + true, false, false, Align); + + // Perform the comparison via a subtract/getsetcc. + EVT VT = Guard.getValueType(); + SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, Guard, StackSlot); + + SDValue Cmp = DAG.getSetCC(getCurSDLoc(), + TLI->getSetCCResultType(*DAG.getContext(), + Sub.getValueType()), + Sub, DAG.getConstant(0, VT), + ISD::SETNE); + + // If the sub is not 0, then we know the guard/stackslot do not equal, so + // branch to failure MBB. + SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurSDLoc(), + MVT::Other, StackSlot.getOperand(0), + Cmp, DAG.getBasicBlock(SPD.getFailureMBB())); + // Otherwise branch to success MBB. + SDValue Br = DAG.getNode(ISD::BR, getCurSDLoc(), + MVT::Other, BrCond, + DAG.getBasicBlock(SPD.getSuccessMBB())); + + DAG.setRoot(Br); +} + +/// Codegen the failure basic block for a stack protector check. +/// +/// A failure stack protector machine basic block consists simply of a call to +/// __stack_chk_fail(). +/// +/// For a high level explanation of how this fits into the stack protector +/// generation see the comment on the declaration of class +/// StackProtectorDescriptor. +void +SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) { + const TargetLowering *TLI = TM.getTargetLowering(); + SDValue Chain = TLI->makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, + MVT::isVoid, 0, 0, false, getCurSDLoc(), + false, false).second; + DAG.setRoot(Chain); +} + /// visitBitTestHeader - This function emits necessary code to produce value /// suitable for "bit tests" void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, @@ -2073,7 +2146,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, CC = ISD::SETEQ; LHS = SV; RHS = I->High; MHS = NULL; } else { - CC = ISD::SETCC_INVALID; + CC = ISD::SETLE; LHS = I->Low; MHS = SV; RHS = I->High; } @@ -2107,7 +2180,7 @@ static inline bool areJTsAllowed(const TargetLowering &TLI) { static APInt ComputeRange(const APInt &First, const APInt &Last) { uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1; - APInt LastExt = Last.zext(BitWidth), FirstExt = First.zext(BitWidth); + APInt LastExt = Last.sext(BitWidth), FirstExt = First.sext(BitWidth); return (LastExt - FirstExt + 1ULL); } @@ -2174,7 +2247,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, const APInt &Low = cast<ConstantInt>(I->Low)->getValue(); const APInt &High = cast<ConstantInt>(I->High)->getValue(); - if (Low.ule(TEI) && TEI.ule(High)) { + if (Low.sle(TEI) && TEI.sle(High)) { DestBBs.push_back(I->BB); if (TEI==High) ++I; @@ -2348,7 +2421,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, // Create a CaseBlock record representing a conditional branch to // the LHS node if the value being switched on SV is less than C. // Otherwise, branch to LHS. - CaseBlock CB(ISD::SETULT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB); + CaseBlock CB(ISD::SETLT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB); if (CR.CaseBB == SwitchBB) visitSwitchCase(CB, SwitchBB); @@ -2378,7 +2451,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, MachineFunction *CurMF = FuncInfo.MF; // If target does not have legal shift left, do not emit bit tests at all. - if (!TLI->isOperationLegal(ISD::SHL, TLI->getPointerTy())) + if (!TLI->isOperationLegal(ISD::SHL, PTy)) return false; size_t numCmps = 0; @@ -2421,7 +2494,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, // Optimize the case where all the case values fit in a // word without having to subtract minValue. In this case, // we can optimize away the subtraction. - if (maxValue.ult(IntPtrBits)) { + if (minValue.isNonNegative() && maxValue.slt(IntPtrBits)) { cmpRange = maxValue; } else { lowBound = minValue; @@ -2496,12 +2569,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, /// Clusterify - Transform simple list of Cases into list of CaseRange's size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, const SwitchInst& SI) { - - /// Use a shorter form of declaration, and also - /// show the we want to use CRSBuilder as Clusterifier. - typedef IntegersSubsetMapping<MachineBasicBlock> Clusterifier; - - Clusterifier TheClusterifier; + size_t numCmps = 0; BranchProbabilityInfo *BPI = FuncInfo.BPI; // Start with "simple" cases @@ -2510,27 +2578,40 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, const BasicBlock *SuccBB = i.getCaseSuccessor(); MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB]; - TheClusterifier.add(i.getCaseValueEx(), SMBB, - BPI ? BPI->getEdgeWeight(SI.getParent(), i.getSuccessorIndex()) : 0); - } - - TheClusterifier.optimize(); - - size_t numCmps = 0; - for (Clusterifier::RangeIterator i = TheClusterifier.begin(), - e = TheClusterifier.end(); i != e; ++i, ++numCmps) { - Clusterifier::Cluster &C = *i; - // Update edge weight for the cluster. - unsigned W = C.first.Weight; - - // FIXME: Currently work with ConstantInt based numbers. - // Changing it to APInt based is a pretty heavy for this commit. - Cases.push_back(Case(C.first.getLow().toConstantInt(), - C.first.getHigh().toConstantInt(), C.second, W)); + uint32_t ExtraWeight = + BPI ? BPI->getEdgeWeight(SI.getParent(), i.getSuccessorIndex()) : 0; + + Cases.push_back(Case(i.getCaseValue(), i.getCaseValue(), + SMBB, ExtraWeight)); + } + std::sort(Cases.begin(), Cases.end(), CaseCmp()); + + // Merge case into clusters + if (Cases.size() >= 2) + // Must recompute end() each iteration because it may be + // invalidated by erase if we hold on to it + for (CaseItr I = Cases.begin(), J = llvm::next(Cases.begin()); + J != Cases.end(); ) { + const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue(); + const APInt& currentValue = cast<ConstantInt>(I->High)->getValue(); + MachineBasicBlock* nextBB = J->BB; + MachineBasicBlock* currentBB = I->BB; + + // If the two neighboring cases go to the same destination, merge them + // into a single case. + if ((nextValue - currentValue == 1) && (currentBB == nextBB)) { + I->High = J->High; + I->ExtraWeight += J->ExtraWeight; + J = Cases.erase(J); + } else { + I = J++; + } + } - if (C.first.getLow() != C.first.getHigh()) - // A range counts double, since it requires two compares. - ++numCmps; + for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) { + if (I->Low != I->High) + // A range counts double, since it requires two compares. + ++numCmps; } return numCmps; @@ -2859,6 +2940,21 @@ void SelectionDAGBuilder::visitBitCast(const User &I) { setValue(&I, N); // noop cast. } +void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const Value *SV = I.getOperand(0); + SDValue N = getValue(SV); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + + unsigned SrcAS = SV->getType()->getPointerAddressSpace(); + unsigned DestAS = I.getType()->getPointerAddressSpace(); + + if (!TLI.isNoopAddrSpaceCast(SrcAS, DestAS)) + N = DAG.getAddrSpaceCast(getCurSDLoc(), DestVT, N, SrcAS, DestAS); + + setValue(&I, N); +} + void SelectionDAGBuilder::visitInsertElement(const User &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue InVec = getValue(I.getOperand(0)); @@ -3151,10 +3247,12 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { } void SelectionDAGBuilder::visitGetElementPtr(const User &I) { - SDValue N = getValue(I.getOperand(0)); + Value *Op0 = I.getOperand(0); // Note that the pointer operand may be a vector of pointers. Take the scalar // element which holds a pointer. - Type *Ty = I.getOperand(0)->getType()->getScalarType(); + Type *Ty = Op0->getType()->getScalarType(); + unsigned AS = Ty->getPointerAddressSpace(); + SDValue N = getValue(Op0); for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end(); OI != E; ++OI) { @@ -3179,14 +3277,13 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { uint64_t Offs = TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); SDValue OffsVal; - EVT PTy = TLI->getPointerTy(); + EVT PTy = TLI->getPointerTy(AS); unsigned PtrBits = PTy.getSizeInBits(); if (PtrBits < 64) - OffsVal = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), - TLI->getPointerTy(), + OffsVal = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), PTy, DAG.getConstant(Offs, MVT::i64)); else - OffsVal = DAG.getIntPtrConstant(Offs); + OffsVal = DAG.getConstant(Offs, PTy); N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, OffsVal); @@ -3194,7 +3291,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { } // N = N + Idx * ElementSize; - APInt ElementSize = APInt(TLI->getPointerTy().getSizeInBits(), + APInt ElementSize = APInt(TLI->getPointerSizeInBits(AS), TD->getTypeAllocSize(Ty)); SDValue IdxN = getValue(Idx); @@ -3451,7 +3548,7 @@ void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { SDValue L = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, - getValue(I.getCompareOperand()).getValueType().getSimpleVT(), + getValue(I.getCompareOperand()).getSimpleValueType(), InChain, getValue(I.getPointerOperand()), getValue(I.getCompareOperand()), @@ -3499,7 +3596,7 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { SDValue L = DAG.getAtomic(NT, dl, - getValue(I.getValOperand()).getValueType().getSimpleVT(), + getValue(I.getValOperand()).getSimpleValueType(), InChain, getValue(I.getPointerOperand()), getValue(I.getValOperand()), @@ -4193,7 +4290,7 @@ static SDValue expandExp2(SDLoc dl, SDValue Op, SelectionDAG &DAG, static SDValue expandPow(SDLoc dl, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const TargetLowering &TLI) { bool IsExp10 = false; - if (LHS.getValueType() == MVT::f32 && LHS.getValueType() == MVT::f32 && + if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { if (ConstantFPSDNode *LHSC = dyn_cast<ConstantFPSDNode>(LHS)) { APFloat Ten(10.0f); @@ -4705,14 +4802,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), sdl, TLI->getPointerTy()); SDValue Offset = DAG.getNode(ISD::ADD, sdl, - TLI->getPointerTy(), + CfaArg.getValueType(), DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, sdl, - TLI->getPointerTy()), + CfaArg.getValueType()), CfaArg); SDValue FA = DAG.getNode(ISD::FRAMEADDR, sdl, TLI->getPointerTy(), DAG.getConstant(0, TLI->getPointerTy())); - setValue(&I, DAG.getNode(ISD::ADD, sdl, TLI->getPointerTy(), + setValue(&I, DAG.getNode(ISD::ADD, sdl, FA.getValueType(), FA, Offset)); return 0; } @@ -4902,7 +4999,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::ceil: case Intrinsic::trunc: case Intrinsic::rint: - case Intrinsic::nearbyint: { + case Intrinsic::nearbyint: + case Intrinsic::round: { unsigned Opcode; switch (Intrinsic) { default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. @@ -4915,6 +5013,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; case Intrinsic::rint: Opcode = ISD::FRINT; break; case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; + case Intrinsic::round: Opcode = ISD::FROUND; break; } setValue(&I, DAG.getNode(Opcode, sdl, @@ -4922,6 +5021,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(0)))); return 0; } + case Intrinsic::copysign: + setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)))); + return 0; case Intrinsic::fma: setValue(&I, DAG.getNode(ISD::FMA, sdl, getValue(I.getArgOperand(0)).getValueType(), @@ -5207,9 +5312,30 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::invariant_end: // Discard region information. return 0; + case Intrinsic::stackprotectorcheck: { + // Do not actually emit anything for this basic block. Instead we initialize + // the stack protector descriptor and export the guard variable so we can + // access it in FinishBasicBlock. + const BasicBlock *BB = I.getParent(); + SPDescriptor.initialize(BB, FuncInfo.MBBMap[BB], I); + ExportFromCurrentBlock(SPDescriptor.getGuard()); + + // Flush our exports since we are going to process a terminator. + (void)getControlRoot(); + return 0; + } case Intrinsic::donothing: // ignore return 0; + case Intrinsic::experimental_stackmap: { + visitStackmap(I); + return 0; + } + case Intrinsic::experimental_patchpoint_void: + case Intrinsic::experimental_patchpoint_i64: { + visitPatchpoint(I); + return 0; + } } } @@ -5274,15 +5400,8 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, SDValue ArgNode = getValue(V); Entry.Node = ArgNode; Entry.Ty = V->getType(); - unsigned attrInd = i - CS.arg_begin() + 1; - Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt); - Entry.isZExt = CS.paramHasAttr(attrInd, Attribute::ZExt); - Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg); - Entry.isSRet = CS.paramHasAttr(attrInd, Attribute::StructRet); - Entry.isNest = CS.paramHasAttr(attrInd, Attribute::Nest); - Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal); - Entry.isReturned = CS.paramHasAttr(attrInd, Attribute::Returned); - Entry.Alignment = CS.getParamAlignment(attrInd); + // Skip the first return-type Attribute to get to params. + Entry.setAttributes(&CS, i - CS.arg_begin() + 1); Args.push_back(Entry); } @@ -5364,8 +5483,8 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, } if (!Result.second.getNode()) { - // As a special case, a null chain means that a tail call has been emitted and - // the DAG root is already updated. + // As a special case, a null chain means that a tail call has been emitted + // and the DAG root is already updated. HasTailCall = true; // Since there's no actual continuation from this block, nothing can be @@ -5445,6 +5564,18 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, return LoadVal; } +/// processIntegerCallValue - Record the value for an instruction that +/// produces an integer result, converting the type where necessary. +void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I, + SDValue Value, + bool IsSigned) { + EVT VT = TM.getTargetLowering()->getValueType(I.getType(), true); + if (IsSigned) + Value = DAG.getSExtOrTrunc(Value, getCurSDLoc(), VT); + else + Value = DAG.getZExtOrTrunc(Value, getCurSDLoc(), VT); + setValue(&I, Value); +} /// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form. /// If so, return true and lower it, otherwise return false and it will be @@ -5460,15 +5591,33 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { !I.getType()->isIntegerTy()) return false; - const ConstantInt *Size = dyn_cast<ConstantInt>(I.getArgOperand(2)); + const Value *Size = I.getArgOperand(2); + const ConstantInt *CSize = dyn_cast<ConstantInt>(Size); + if (CSize && CSize->getZExtValue() == 0) { + EVT CallVT = TM.getTargetLowering()->getValueType(I.getType(), true); + setValue(&I, DAG.getConstant(0, CallVT)); + return true; + } + + const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + std::pair<SDValue, SDValue> Res = + TSI.EmitTargetCodeForMemcmp(DAG, getCurSDLoc(), DAG.getRoot(), + getValue(LHS), getValue(RHS), getValue(Size), + MachinePointerInfo(LHS), + MachinePointerInfo(RHS)); + if (Res.first.getNode()) { + processIntegerCallValue(I, Res.first, true); + PendingLoads.push_back(Res.second); + return true; + } // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0 // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0 - if (Size && IsOnlyUsedInZeroEqualityComparison(&I)) { + if (CSize && IsOnlyUsedInZeroEqualityComparison(&I)) { bool ActuallyDoIt = true; MVT LoadVT; Type *LoadTy; - switch (Size->getZExtValue()) { + switch (CSize->getZExtValue()) { default: LoadVT = MVT::Other; LoadTy = 0; @@ -5476,20 +5625,20 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { break; case 2: LoadVT = MVT::i16; - LoadTy = Type::getInt16Ty(Size->getContext()); + LoadTy = Type::getInt16Ty(CSize->getContext()); break; case 4: LoadVT = MVT::i32; - LoadTy = Type::getInt32Ty(Size->getContext()); + LoadTy = Type::getInt32Ty(CSize->getContext()); break; case 8: LoadVT = MVT::i64; - LoadTy = Type::getInt64Ty(Size->getContext()); + LoadTy = Type::getInt64Ty(CSize->getContext()); break; /* case 16: LoadVT = MVT::v4i32; - LoadTy = Type::getInt32Ty(Size->getContext()); + LoadTy = Type::getInt32Ty(CSize->getContext()); LoadTy = VectorType::get(LoadTy, 4); break; */ @@ -5503,7 +5652,7 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { // supports unaligned loads of that type. Expanding into byte loads would // bloat the code. const TargetLowering *TLI = TM.getTargetLowering(); - if (ActuallyDoIt && Size->getZExtValue() > 4) { + if (ActuallyDoIt && CSize->getZExtValue() > 4) { // TODO: Handle 5 byte compare as 4-byte + 1 byte. // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads. if (!TLI->isTypeLegal(LoadVT) ||!TLI->allowsUnalignedMemoryAccesses(LoadVT)) @@ -5516,8 +5665,7 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { SDValue Res = DAG.getSetCC(getCurSDLoc(), MVT::i1, LHSVal, RHSVal, ISD::SETNE); - EVT CallVT = TLI->getValueType(I.getType(), true); - setValue(&I, DAG.getZExtOrTrunc(Res, getCurSDLoc(), CallVT)); + processIntegerCallValue(I, Res, false); return true; } } @@ -5526,6 +5674,148 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { return false; } +/// visitMemChrCall -- See if we can lower a memchr call into an optimized +/// form. If so, return true and lower it, otherwise return false and it +/// will be lowered like a normal call. +bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) { + // Verify that the prototype makes sense. void *memchr(void *, int, size_t) + if (I.getNumArgOperands() != 3) + return false; + + const Value *Src = I.getArgOperand(0); + const Value *Char = I.getArgOperand(1); + const Value *Length = I.getArgOperand(2); + if (!Src->getType()->isPointerTy() || + !Char->getType()->isIntegerTy() || + !Length->getType()->isIntegerTy() || + !I.getType()->isPointerTy()) + return false; + + const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + std::pair<SDValue, SDValue> Res = + TSI.EmitTargetCodeForMemchr(DAG, getCurSDLoc(), DAG.getRoot(), + getValue(Src), getValue(Char), getValue(Length), + MachinePointerInfo(Src)); + if (Res.first.getNode()) { + setValue(&I, Res.first); + PendingLoads.push_back(Res.second); + return true; + } + + return false; +} + +/// visitStrCpyCall -- See if we can lower a strcpy or stpcpy call into an +/// optimized form. If so, return true and lower it, otherwise return false +/// and it will be lowered like a normal call. +bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) { + // Verify that the prototype makes sense. char *strcpy(char *, char *) + if (I.getNumArgOperands() != 2) + return false; + + const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); + if (!Arg0->getType()->isPointerTy() || + !Arg1->getType()->isPointerTy() || + !I.getType()->isPointerTy()) + return false; + + const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + std::pair<SDValue, SDValue> Res = + TSI.EmitTargetCodeForStrcpy(DAG, getCurSDLoc(), getRoot(), + getValue(Arg0), getValue(Arg1), + MachinePointerInfo(Arg0), + MachinePointerInfo(Arg1), isStpcpy); + if (Res.first.getNode()) { + setValue(&I, Res.first); + DAG.setRoot(Res.second); + return true; + } + + return false; +} + +/// visitStrCmpCall - See if we can lower a call to strcmp in an optimized form. +/// If so, return true and lower it, otherwise return false and it will be +/// lowered like a normal call. +bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) { + // Verify that the prototype makes sense. int strcmp(void*,void*) + if (I.getNumArgOperands() != 2) + return false; + + const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); + if (!Arg0->getType()->isPointerTy() || + !Arg1->getType()->isPointerTy() || + !I.getType()->isIntegerTy()) + return false; + + const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + std::pair<SDValue, SDValue> Res = + TSI.EmitTargetCodeForStrcmp(DAG, getCurSDLoc(), DAG.getRoot(), + getValue(Arg0), getValue(Arg1), + MachinePointerInfo(Arg0), + MachinePointerInfo(Arg1)); + if (Res.first.getNode()) { + processIntegerCallValue(I, Res.first, true); + PendingLoads.push_back(Res.second); + return true; + } + + return false; +} + +/// visitStrLenCall -- See if we can lower a strlen call into an optimized +/// form. If so, return true and lower it, otherwise return false and it +/// will be lowered like a normal call. +bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) { + // Verify that the prototype makes sense. size_t strlen(char *) + if (I.getNumArgOperands() != 1) + return false; + + const Value *Arg0 = I.getArgOperand(0); + if (!Arg0->getType()->isPointerTy() || !I.getType()->isIntegerTy()) + return false; + + const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + std::pair<SDValue, SDValue> Res = + TSI.EmitTargetCodeForStrlen(DAG, getCurSDLoc(), DAG.getRoot(), + getValue(Arg0), MachinePointerInfo(Arg0)); + if (Res.first.getNode()) { + processIntegerCallValue(I, Res.first, false); + PendingLoads.push_back(Res.second); + return true; + } + + return false; +} + +/// visitStrNLenCall -- See if we can lower a strnlen call into an optimized +/// form. If so, return true and lower it, otherwise return false and it +/// will be lowered like a normal call. +bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) { + // Verify that the prototype makes sense. size_t strnlen(char *, size_t) + if (I.getNumArgOperands() != 2) + return false; + + const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); + if (!Arg0->getType()->isPointerTy() || + !Arg1->getType()->isIntegerTy() || + !I.getType()->isIntegerTy()) + return false; + + const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + std::pair<SDValue, SDValue> Res = + TSI.EmitTargetCodeForStrnlen(DAG, getCurSDLoc(), DAG.getRoot(), + getValue(Arg0), getValue(Arg1), + MachinePointerInfo(Arg0)); + if (Res.first.getNode()) { + processIntegerCallValue(I, Res.first, false); + PendingLoads.push_back(Res.second); + return true; + } + + return false; +} + /// visitUnaryFloatCall - If a call instruction is a unary floating-point /// operation (as expected), translate it to an SDNode with the specified opcode /// and return true. @@ -5644,6 +5934,12 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { if (visitUnaryFloatCall(I, ISD::FRINT)) return; break; + case LibFunc::round: + case LibFunc::roundf: + case LibFunc::roundl: + if (visitUnaryFloatCall(I, ISD::FROUND)) + return; + break; case LibFunc::trunc: case LibFunc::truncf: case LibFunc::truncl: @@ -5666,6 +5962,30 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { if (visitMemCmpCall(I)) return; break; + case LibFunc::memchr: + if (visitMemChrCall(I)) + return; + break; + case LibFunc::strcpy: + if (visitStrCpyCall(I, false)) + return; + break; + case LibFunc::stpcpy: + if (visitStrCpyCall(I, true)) + return; + break; + case LibFunc::strcmp: + if (visitStrCmpCall(I)) + return; + break; + case LibFunc::strlen: + if (visitStrLenCall(I)) + return; + break; + case LibFunc::strnlen: + if (visitStrNLenCall(I)) + return; + break; } } } @@ -6421,6 +6741,248 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) { DAG.getSrcValue(I.getArgOperand(1)))); } +/// \brief Lower an argument list according to the target calling convention. +/// +/// \return A tuple of <return-value, token-chain> +/// +/// This is a helper for lowering intrinsics that follow a target calling +/// convention or require stack pointer adjustment. Only a subset of the +/// intrinsic's operands need to participate in the calling convention. +std::pair<SDValue, SDValue> +SelectionDAGBuilder::LowerCallOperands(const CallInst &CI, unsigned ArgIdx, + unsigned NumArgs, SDValue Callee, + bool useVoidTy) { + TargetLowering::ArgListTy Args; + Args.reserve(NumArgs); + + // Populate the argument list. + // Attributes for args start at offset 1, after the return attribute. + ImmutableCallSite CS(&CI); + for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs, AttrI = ArgIdx + 1; + ArgI != ArgE; ++ArgI) { + const Value *V = CI.getOperand(ArgI); + + assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic."); + + TargetLowering::ArgListEntry Entry; + Entry.Node = getValue(V); + Entry.Ty = V->getType(); + Entry.setAttributes(&CS, AttrI); + Args.push_back(Entry); + } + + Type *retTy = useVoidTy ? Type::getVoidTy(*DAG.getContext()) : CI.getType(); + TargetLowering::CallLoweringInfo CLI(getRoot(), retTy, /*retSExt*/ false, + /*retZExt*/ false, /*isVarArg*/ false, /*isInReg*/ false, NumArgs, + CI.getCallingConv(), /*isTailCall*/ false, /*doesNotReturn*/ false, + /*isReturnValueUsed*/ CI.use_empty(), Callee, Args, DAG, getCurSDLoc()); + + const TargetLowering *TLI = TM.getTargetLowering(); + return TLI->LowerCallTo(CLI); +} + +/// \brief Lower llvm.experimental.stackmap directly to its target opcode. +void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { + // void @llvm.experimental.stackmap(i32 <id>, i32 <numShadowBytes>, + // [live variables...]) + + assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value."); + + SDValue Callee = getValue(CI.getCalledValue()); + + // Lower into a call sequence with no args and no return value. + std::pair<SDValue, SDValue> Result = LowerCallOperands(CI, 0, 0, Callee); + // Set the root to the target-lowered call chain. + SDValue Chain = Result.second; + DAG.setRoot(Chain); + + /// Get a call instruction from the call sequence chain. + /// Tail calls are not allowed. + SDNode *CallEnd = Chain.getNode(); + assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && + "Expected a callseq node."); + SDNode *Call = CallEnd->getOperand(0).getNode(); + bool hasGlue = Call->getGluedNode(); + + // Replace the target specific call node with the stackmap intrinsic. + SmallVector<SDValue, 8> Ops; + + // Add the <id> and <numShadowBytes> constants. + for (unsigned i = 0; i < 2; ++i) { + SDValue tmp = getValue(CI.getOperand(i)); + Ops.push_back(DAG.getTargetConstant( + cast<ConstantSDNode>(tmp)->getZExtValue(), MVT::i32)); + } + // Push live variables for the stack map. + for (unsigned i = 2, e = CI.getNumArgOperands(); i != e; ++i) + Ops.push_back(getValue(CI.getArgOperand(i))); + + // Push the chain (this is originally the first operand of the call, but + // becomes now the last or second to last operand). + Ops.push_back(*(Call->op_begin())); + + // Push the glue flag (last operand). + if (hasGlue) + Ops.push_back(*(Call->op_end()-1)); + + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + + // Replace the target specific call node with a STACKMAP node. + MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::STACKMAP, getCurSDLoc(), + NodeTys, Ops); + + // StackMap generates no value, so nothing goes in the NodeMap. + + // Fixup the consumers of the intrinsic. The chain and glue may be used in the + // call sequence. + DAG.ReplaceAllUsesWith(Call, MN); + + DAG.DeleteNode(Call); +} + +/// \brief Lower llvm.experimental.patchpoint directly to its target opcode. +void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { + // void|i64 @llvm.experimental.patchpoint.void|i64(i32 <id>, + // i32 <numBytes>, + // i8* <target>, + // i32 <numArgs>, + // [Args...], + // [live variables...]) + + CallingConv::ID CC = CI.getCallingConv(); + bool isAnyRegCC = CC == CallingConv::AnyReg; + bool hasDef = !CI.getType()->isVoidTy(); + SDValue Callee = getValue(CI.getOperand(2)); // <target> + + // Get the real number of arguments participating in the call <numArgs> + unsigned NumArgs = + cast<ConstantSDNode>(getValue(CI.getArgOperand(3)))->getZExtValue(); + + // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs> + assert(CI.getNumArgOperands() >= NumArgs + 4 && + "Not enough arguments provided to the patchpoint intrinsic"); + + // For AnyRegCC the arguments are lowered later on manually. + unsigned NumCallArgs = isAnyRegCC ? 0 : NumArgs; + std::pair<SDValue, SDValue> Result = + LowerCallOperands(CI, 4, NumCallArgs, Callee, isAnyRegCC); + + // Set the root to the target-lowered call chain. + SDValue Chain = Result.second; + DAG.setRoot(Chain); + + SDNode *CallEnd = Chain.getNode(); + if (hasDef && (CallEnd->getOpcode() == ISD::CopyFromReg)) + CallEnd = CallEnd->getOperand(0).getNode(); + + /// Get a call instruction from the call sequence chain. + /// Tail calls are not allowed. + assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && + "Expected a callseq node."); + SDNode *Call = CallEnd->getOperand(0).getNode(); + bool hasGlue = Call->getGluedNode(); + + // Replace the target specific call node with the patchable intrinsic. + SmallVector<SDValue, 8> Ops; + + // Add the <id> and <numNopBytes> constants. + for (unsigned i = 0; i < 2; ++i) { + SDValue tmp = getValue(CI.getOperand(i)); + Ops.push_back(DAG.getTargetConstant( + cast<ConstantSDNode>(tmp)->getZExtValue(), MVT::i32)); + } + // Assume that the Callee is a constant address. + Ops.push_back( + DAG.getIntPtrConstant(cast<ConstantSDNode>(Callee)->getZExtValue(), + /*isTarget=*/true)); + + // Adjust <numArgs> to account for any arguments that have been passed on the + // stack instead. + // Call Node: Chain, Target, {Args}, RegMask, [Glue] + unsigned NumCallRegArgs = Call->getNumOperands() - (hasGlue ? 4 : 3); + NumCallRegArgs = isAnyRegCC ? NumArgs : NumCallRegArgs; + Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, MVT::i32)); + + // Add the calling convention + Ops.push_back(DAG.getTargetConstant((unsigned)CC, MVT::i32)); + + // Add the arguments we omitted previously. The register allocator should + // place these in any free register. + if (isAnyRegCC) + for (unsigned i = 4, e = NumArgs + 4; i != e; ++i) + Ops.push_back(getValue(CI.getArgOperand(i))); + + // Push the arguments from the call instruction. + SDNode::op_iterator e = hasGlue ? Call->op_end()-2 : Call->op_end()-1; + for (SDNode::op_iterator i = Call->op_begin()+2; i != e; ++i) + Ops.push_back(*i); + + // Push live variables for the stack map. + for (unsigned i = NumArgs + 4, e = CI.getNumArgOperands(); i != e; ++i) { + SDValue OpVal = getValue(CI.getArgOperand(i)); + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) { + Ops.push_back( + DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); + Ops.push_back( + DAG.getTargetConstant(C->getSExtValue(), MVT::i64)); + } else + Ops.push_back(OpVal); + } + + // Push the register mask info. + if (hasGlue) + Ops.push_back(*(Call->op_end()-2)); + else + Ops.push_back(*(Call->op_end()-1)); + + // Push the chain (this is originally the first operand of the call, but + // becomes now the last or second to last operand). + Ops.push_back(*(Call->op_begin())); + + // Push the glue flag (last operand). + if (hasGlue) + Ops.push_back(*(Call->op_end()-1)); + + SDVTList NodeTys; + if (isAnyRegCC && hasDef) { + // Create the return types based on the intrinsic definition + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SmallVector<EVT, 3> ValueVTs; + ComputeValueVTs(TLI, CI.getType(), ValueVTs); + assert(ValueVTs.size() == 1 && "Expected only one return value type."); + + // There is always a chain and a glue type at the end + ValueVTs.push_back(MVT::Other); + ValueVTs.push_back(MVT::Glue); + NodeTys = DAG.getVTList(ValueVTs.data(), ValueVTs.size()); + } else + NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + + // Replace the target specific call node with a PATCHPOINT node. + MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHPOINT, + getCurSDLoc(), NodeTys, Ops); + + // Update the NodeMap. + if (hasDef) { + if (isAnyRegCC) + setValue(&CI, SDValue(MN, 0)); + else + setValue(&CI, Result.first); + } + + // Fixup the consumers of the intrinsic. The chain and glue may be used in the + // call sequence. Furthermore the location of the chain and glue can change + // when the AnyReg calling convention is used and the intrinsic returns a + // value. + if (isAnyRegCC && hasDef) { + SDValue From[] = {SDValue(Call, 0), SDValue(Call, 1)}; + SDValue To[] = {SDValue(MN, 1), SDValue(MN, 2)}; + DAG.ReplaceAllUsesOfValuesWith(From, To, 2); + } else + DAG.ReplaceAllUsesWith(Call, MN); + DAG.DeleteNode(Call); +} + /// TargetLowering::LowerCallTo - This is the default LowerCallTo /// implementation, which just calls LowerCall. /// FIXME: When all targets are @@ -6438,6 +7000,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { for (unsigned i = 0; i != NumRegs; ++i) { ISD::InputArg MyFlags; MyFlags.VT = RegisterVT; + MyFlags.ArgVT = VT; MyFlags.Used = CLI.IsReturnValueUsed; if (CLI.RetSExt) MyFlags.Flags.setSExt(); @@ -6527,7 +7090,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { for (unsigned j = 0; j != NumParts; ++j) { // if it isn't first piece, alignment must be 1 - ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), + ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), VT, i < CLI.NumFixedArgs, i, j*Parts[j].getValueType().getStoreSize()); if (NumParts > 1 && j == 0) @@ -6666,7 +7229,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { ISD::ArgFlagsTy Flags; Flags.setSRet(); MVT RegisterVT = TLI->getRegisterType(*DAG.getContext(), ValueVTs[0]); - ISD::InputArg RetArg(Flags, RegisterVT, true, 0, 0); + ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true, 0, 0); Ins.push_back(RetArg); } @@ -6677,6 +7240,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(*TLI, I->getType(), ValueVTs); bool isArgValueUsed = !I->use_empty(); + unsigned PartBase = 0; for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; ++Value) { EVT VT = ValueVTs[Value]; @@ -6714,8 +7278,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT); unsigned NumRegs = TLI->getNumRegisters(*CurDAG->getContext(), VT); for (unsigned i = 0; i != NumRegs; ++i) { - ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed, - Idx-1, i*RegisterVT.getStoreSize()); + ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed, + Idx-1, PartBase+i*RegisterVT.getStoreSize()); if (NumRegs > 1 && i == 0) MyFlags.Flags.setSplit(); // if it isn't first piece, alignment must be 1 @@ -6723,6 +7287,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { MyFlags.Flags.setOrigAlign(1); Ins.push_back(MyFlags); } + PartBase += VT.getStoreSize(); } } @@ -6940,3 +7505,22 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { ConstantsOut.clear(); } + +/// Add a successor MBB to ParentMBB< creating a new MachineBB for BB if SuccMBB +/// is 0. +MachineBasicBlock * +SelectionDAGBuilder::StackProtectorDescriptor:: +AddSuccessorMBB(const BasicBlock *BB, + MachineBasicBlock *ParentMBB, + MachineBasicBlock *SuccMBB) { + // If SuccBB has not been created yet, create it. + if (!SuccMBB) { + MachineFunction *MF = ParentMBB->getParent(); + MachineFunction::iterator BBI = ParentMBB; + SuccMBB = MF->CreateMachineBasicBlock(BB); + MF->insert(++BBI, SuccMBB); + } + // Add it as a successor of ParentMBB. + ParentMBB->addSuccessor(SuccMBB); + return SuccMBB; +} diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index ef73c00..835f643 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -1,4 +1,4 @@ -//===-- SelectionDAGBuilder.h - Selection-DAG building --------*- c++ -*---===// +//===-- SelectionDAGBuilder.h - Selection-DAG building --------*- C++ -*---===// // // The LLVM Compiler Infrastructure // @@ -26,6 +26,7 @@ namespace llvm { +class AddrSpaceCastInst; class AliasAnalysis; class AllocaInst; class BasicBlock; @@ -84,7 +85,7 @@ class SelectionDAGBuilder { const Instruction *CurInst; DenseMap<const Value*, SDValue> NodeMap; - + /// UnusedArgNodeMap - Maps argument value for unused arguments. This is used /// to preserve debug information for incoming arguments. DenseMap<const Value*, SDValue> UnusedArgNodeMap; @@ -182,6 +183,17 @@ private: typedef std::vector<CaseRec> CaseRecVector; + /// The comparison function for sorting the switch case values in the vector. + /// WARNING: Case ranges should be disjoint! + struct CaseCmp { + bool operator()(const Case &C1, const Case &C2) { + assert(isa<ConstantInt>(C1.Low) && isa<ConstantInt>(C2.High)); + const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low); + const ConstantInt* CI2 = cast<const ConstantInt>(C2.High); + return CI1->getValue().slt(CI2->getValue()); + } + }; + struct CaseBitsCmp { bool operator()(const CaseBits &C1, const CaseBits &C2) { return C1.Bits > C2.Bits; @@ -224,7 +236,7 @@ private: struct JumpTable { JumpTable(unsigned R, unsigned J, MachineBasicBlock *M, MachineBasicBlock *D): Reg(R), JTI(J), MBB(M), Default(D) {} - + /// Reg - the virtual register containing the index of the jump table entry //. to jump to. unsigned Reg; @@ -278,6 +290,201 @@ private: BitTestInfo Cases; }; + /// A class which encapsulates all of the information needed to generate a + /// stack protector check and signals to isel via its state being initialized + /// that a stack protector needs to be generated. + /// + /// *NOTE* The following is a high level documentation of SelectionDAG Stack + /// Protector Generation. The reason that it is placed here is for a lack of + /// other good places to stick it. + /// + /// High Level Overview of SelectionDAG Stack Protector Generation: + /// + /// Previously, generation of stack protectors was done exclusively in the + /// pre-SelectionDAG Codegen LLVM IR Pass "Stack Protector". This necessitated + /// splitting basic blocks at the IR level to create the success/failure basic + /// blocks in the tail of the basic block in question. As a result of this, + /// calls that would have qualified for the sibling call optimization were no + /// longer eligible for optimization since said calls were no longer right in + /// the "tail position" (i.e. the immediate predecessor of a ReturnInst + /// instruction). + /// + /// Then it was noticed that since the sibling call optimization causes the + /// callee to reuse the caller's stack, if we could delay the generation of + /// the stack protector check until later in CodeGen after the sibling call + /// decision was made, we get both the tail call optimization and the stack + /// protector check! + /// + /// A few goals in solving this problem were: + /// + /// 1. Preserve the architecture independence of stack protector generation. + /// + /// 2. Preserve the normal IR level stack protector check for platforms like + /// OpenBSD for which we support platform specific stack protector + /// generation. + /// + /// The main problem that guided the present solution is that one can not + /// solve this problem in an architecture independent manner at the IR level + /// only. This is because: + /// + /// 1. The decision on whether or not to perform a sibling call on certain + /// platforms (for instance i386) requires lower level information + /// related to available registers that can not be known at the IR level. + /// + /// 2. Even if the previous point were not true, the decision on whether to + /// perform a tail call is done in LowerCallTo in SelectionDAG which + /// occurs after the Stack Protector Pass. As a result, one would need to + /// put the relevant callinst into the stack protector check success + /// basic block (where the return inst is placed) and then move it back + /// later at SelectionDAG/MI time before the stack protector check if the + /// tail call optimization failed. The MI level option was nixed + /// immediately since it would require platform specific pattern + /// matching. The SelectionDAG level option was nixed because + /// SelectionDAG only processes one IR level basic block at a time + /// implying one could not create a DAG Combine to move the callinst. + /// + /// To get around this problem a few things were realized: + /// + /// 1. While one can not handle multiple IR level basic blocks at the + /// SelectionDAG Level, one can generate multiple machine basic blocks + /// for one IR level basic block. This is how we handle bit tests and + /// switches. + /// + /// 2. At the MI level, tail calls are represented via a special return + /// MIInst called "tcreturn". Thus if we know the basic block in which we + /// wish to insert the stack protector check, we get the correct behavior + /// by always inserting the stack protector check right before the return + /// statement. This is a "magical transformation" since no matter where + /// the stack protector check intrinsic is, we always insert the stack + /// protector check code at the end of the BB. + /// + /// Given the aforementioned constraints, the following solution was devised: + /// + /// 1. On platforms that do not support SelectionDAG stack protector check + /// generation, allow for the normal IR level stack protector check + /// generation to continue. + /// + /// 2. On platforms that do support SelectionDAG stack protector check + /// generation: + /// + /// a. Use the IR level stack protector pass to decide if a stack + /// protector is required/which BB we insert the stack protector check + /// in by reusing the logic already therein. If we wish to generate a + /// stack protector check in a basic block, we place a special IR + /// intrinsic called llvm.stackprotectorcheck right before the BB's + /// returninst or if there is a callinst that could potentially be + /// sibling call optimized, before the call inst. + /// + /// b. Then when a BB with said intrinsic is processed, we codegen the BB + /// normally via SelectBasicBlock. In said process, when we visit the + /// stack protector check, we do not actually emit anything into the + /// BB. Instead, we just initialize the stack protector descriptor + /// class (which involves stashing information/creating the success + /// mbbb and the failure mbb if we have not created one for this + /// function yet) and export the guard variable that we are going to + /// compare. + /// + /// c. After we finish selecting the basic block, in FinishBasicBlock if + /// the StackProtectorDescriptor attached to the SelectionDAGBuilder is + /// initialized, we first find a splice point in the parent basic block + /// before the terminator and then splice the terminator of said basic + /// block into the success basic block. Then we code-gen a new tail for + /// the parent basic block consisting of the two loads, the comparison, + /// and finally two branches to the success/failure basic blocks. We + /// conclude by code-gening the failure basic block if we have not + /// code-gened it already (all stack protector checks we generate in + /// the same function, use the same failure basic block). + class StackProtectorDescriptor { + public: + StackProtectorDescriptor() : ParentMBB(0), SuccessMBB(0), FailureMBB(0), + Guard(0) { } + ~StackProtectorDescriptor() { } + + /// Returns true if all fields of the stack protector descriptor are + /// initialized implying that we should/are ready to emit a stack protector. + bool shouldEmitStackProtector() const { + return ParentMBB && SuccessMBB && FailureMBB && Guard; + } + + /// Initialize the stack protector descriptor structure for a new basic + /// block. + void initialize(const BasicBlock *BB, + MachineBasicBlock *MBB, + const CallInst &StackProtCheckCall) { + // Make sure we are not initialized yet. + assert(!shouldEmitStackProtector() && "Stack Protector Descriptor is " + "already initialized!"); + ParentMBB = MBB; + SuccessMBB = AddSuccessorMBB(BB, MBB); + FailureMBB = AddSuccessorMBB(BB, MBB, FailureMBB); + if (!Guard) + Guard = StackProtCheckCall.getArgOperand(0); + } + + /// Reset state that changes when we handle different basic blocks. + /// + /// This currently includes: + /// + /// 1. The specific basic block we are generating a + /// stack protector for (ParentMBB). + /// + /// 2. The successor machine basic block that will contain the tail of + /// parent mbb after we create the stack protector check (SuccessMBB). This + /// BB is visited only on stack protector check success. + void resetPerBBState() { + ParentMBB = 0; + SuccessMBB = 0; + } + + /// Reset state that only changes when we switch functions. + /// + /// This currently includes: + /// + /// 1. FailureMBB since we reuse the failure code path for all stack + /// protector checks created in an individual function. + /// + /// 2.The guard variable since the guard variable we are checking against is + /// always the same. + void resetPerFunctionState() { + FailureMBB = 0; + Guard = 0; + } + + MachineBasicBlock *getParentMBB() { return ParentMBB; } + MachineBasicBlock *getSuccessMBB() { return SuccessMBB; } + MachineBasicBlock *getFailureMBB() { return FailureMBB; } + const Value *getGuard() { return Guard; } + + private: + /// The basic block for which we are generating the stack protector. + /// + /// As a result of stack protector generation, we will splice the + /// terminators of this basic block into the successor mbb SuccessMBB and + /// replace it with a compare/branch to the successor mbbs + /// SuccessMBB/FailureMBB depending on whether or not the stack protector + /// was violated. + MachineBasicBlock *ParentMBB; + + /// A basic block visited on stack protector check success that contains the + /// terminators of ParentMBB. + MachineBasicBlock *SuccessMBB; + + /// This basic block visited on stack protector check failure that will + /// contain a call to __stack_chk_fail(). + MachineBasicBlock *FailureMBB; + + /// The guard variable which we will compare against the stored value in the + /// stack protector stack slot. + const Value *Guard; + + /// Add a successor machine basic block to ParentMBB. If the successor mbb + /// has not been created yet (i.e. if SuccMBB = 0), then the machine basic + /// block will be created. + MachineBasicBlock *AddSuccessorMBB(const BasicBlock *BB, + MachineBasicBlock *ParentMBB, + MachineBasicBlock *SuccMBB = 0); + }; + private: const TargetMachine &TM; public: @@ -295,6 +502,9 @@ public: /// BitTestCases - Vector of BitTestBlock structures used to communicate /// SwitchInst code generation information. std::vector<BitTestBlock> BitTestCases; + /// A StackProtectorDescriptor structure used to communicate stack protector + /// information in between SelectBasicBlock and FinishBasicBlock. + StackProtectorDescriptor SPDescriptor; // Emit PHI-node-operand constants only once even if used by multiple // PHI nodes. @@ -305,9 +515,9 @@ public: FunctionLoweringInfo &FuncInfo; /// OptLevel - What optimization level we're generating code for. - /// + /// CodeGenOpt::Level OptLevel; - + /// GFI - Garbage collection metadata for the function. GCFunctionInfo *GFI; @@ -389,7 +599,7 @@ public: assert(N.getNode() == 0 && "Already set a value for this node!"); N = NewN; } - + void setUnusedArgValue(const Value *V, SDValue NewN) { SDValue &N = UnusedArgNodeMap[V]; assert(N.getNode() == 0 && "Already set a value for this node!"); @@ -410,6 +620,12 @@ public: void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall, MachineBasicBlock *LandingPad = NULL); + std::pair<SDValue, SDValue> LowerCallOperands(const CallInst &CI, + unsigned ArgIdx, + unsigned NumArgs, + SDValue Callee, + bool useVoidTy = false); + /// UpdateSplitBlock - When an MBB was split during scheduling, update the /// references that ned to refer to the last resulting block. void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last); @@ -451,6 +667,9 @@ private: public: void visitSwitchCase(CaseBlock &CB, MachineBasicBlock *SwitchBB); + void visitSPDescriptorParent(StackProtectorDescriptor &SPD, + MachineBasicBlock *ParentBB); + void visitSPDescriptorFailure(StackProtectorDescriptor &SPD); void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB); void visitBitTestCase(BitTestBlock &BB, MachineBasicBlock* NextMBB, @@ -461,7 +680,7 @@ public: void visitJumpTable(JumpTable &JT); void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH, MachineBasicBlock *SwitchBB); - + private: // These all get lowered before this pass. void visitInvoke(const InvokeInst &I); @@ -502,6 +721,7 @@ private: void visitPtrToInt(const User &I); void visitIntToPtr(const User &I); void visitBitCast(const User &I); + void visitAddrSpaceCast(const User &I); void visitExtractElement(const User &I); void visitInsertElement(const User &I); @@ -523,6 +743,11 @@ private: void visitPHI(const PHINode &I); void visitCall(const CallInst &I); bool visitMemCmpCall(const CallInst &I); + bool visitMemChrCall(const CallInst &I); + bool visitStrCpyCall(const CallInst &I, bool isStpcpy); + bool visitStrCmpCall(const CallInst &I); + bool visitStrLenCall(const CallInst &I); + bool visitStrNLenCall(const CallInst &I); bool visitUnaryFloatCall(const CallInst &I, unsigned Opcode); void visitAtomicLoad(const LoadInst &I); void visitAtomicStore(const StoreInst &I); @@ -535,6 +760,8 @@ private: void visitVAArg(const VAArgInst &I); void visitVAEnd(const CallInst &I); void visitVACopy(const CallInst &I); + void visitStackmap(const CallInst &I); + void visitPatchpoint(const CallInst &I); void visitUserOp1(const Instruction &I) { llvm_unreachable("UserOp1 should not exist at instruction selection time!"); @@ -543,10 +770,13 @@ private: llvm_unreachable("UserOp2 should not exist at instruction selection time!"); } + void processIntegerCallValue(const Instruction &I, + SDValue Value, bool IsSigned); + void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB); /// EmitFuncArgumentDbgValue - If V is an function argument then create - /// corresponding DBG_VALUE machine instruction for it now. At the end of + /// corresponding DBG_VALUE machine instruction for it now. At the end of /// instruction selection, they will be inserted to the entry BB. bool EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, int64_t Offset, const SDValue &N); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index d8ee221..c04a08d 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -142,6 +142,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FCEIL: return "fceil"; case ISD::FRINT: return "frint"; case ISD::FNEARBYINT: return "fnearbyint"; + case ISD::FROUND: return "fround"; case ISD::FEXP: return "fexp"; case ISD::FEXP2: return "fexp2"; case ISD::FLOG: return "flog"; @@ -223,6 +224,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FP_TO_SINT: return "fp_to_sint"; case ISD::FP_TO_UINT: return "fp_to_uint"; case ISD::BITCAST: return "bitcast"; + case ISD::ADDRSPACECAST: return "addrspacecast"; case ISD::FP16_TO_FP32: return "fp16_to_fp32"; case ISD::FP32_TO_FP16: return "fp32_to_fp16"; @@ -484,6 +486,13 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { OS << " " << offset; if (unsigned int TF = BA->getTargetFlags()) OS << " [TF=" << TF << ']'; + } else if (const AddrSpaceCastSDNode *ASC = + dyn_cast<AddrSpaceCastSDNode>(this)) { + OS << '[' + << ASC->getSrcAddressSpace() + << " -> " + << ASC->getDestAddressSpace() + << ']'; } if (unsigned Order = getIROrder()) diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 01da51c..3a0cfa1 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -223,6 +223,44 @@ defaultListDAGScheduler("default", "Best scheduler for the target", namespace llvm { //===--------------------------------------------------------------------===// + /// \brief This class is used by SelectionDAGISel to temporarily override + /// the optimization level on a per-function basis. + class OptLevelChanger { + SelectionDAGISel &IS; + CodeGenOpt::Level SavedOptLevel; + bool SavedFastISel; + + public: + OptLevelChanger(SelectionDAGISel &ISel, + CodeGenOpt::Level NewOptLevel) : IS(ISel) { + SavedOptLevel = IS.OptLevel; + if (NewOptLevel == SavedOptLevel) + return; + IS.OptLevel = NewOptLevel; + IS.TM.setOptLevel(NewOptLevel); + SavedFastISel = IS.TM.Options.EnableFastISel; + if (NewOptLevel == CodeGenOpt::None) + IS.TM.setFastISel(true); + DEBUG(dbgs() << "\nChanging optimization level for Function " + << IS.MF->getFunction()->getName() << "\n"); + DEBUG(dbgs() << "\tBefore: -O" << SavedOptLevel + << " ; After: -O" << NewOptLevel << "\n"); + } + + ~OptLevelChanger() { + if (IS.OptLevel == SavedOptLevel) + return; + DEBUG(dbgs() << "\nRestoring optimization level for Function " + << IS.MF->getFunction()->getName() << "\n"); + DEBUG(dbgs() << "\tBefore: -O" << IS.OptLevel + << " ; After: -O" << SavedOptLevel << "\n"); + IS.OptLevel = SavedOptLevel; + IS.TM.setOptLevel(SavedOptLevel); + IS.TM.setFastISel(SavedFastISel); + } + }; + + //===--------------------------------------------------------------------===// /// createDefaultScheduler - This creates an instruction scheduler appropriate /// for the target. ScheduleDAGSDNodes* createDefaultScheduler(SelectionDAGISel *IS, @@ -230,7 +268,7 @@ namespace llvm { const TargetLowering *TLI = IS->getTargetLowering(); const TargetSubtargetInfo &ST = IS->TM.getSubtarget<TargetSubtargetInfo>(); - if (OptLevel == CodeGenOpt::None || ST.enableMachineScheduler() || + if (OptLevel == CodeGenOpt::None || ST.useMachineScheduler() || TLI->getSchedulingPreference() == Sched::Source) return createSourceListDAGScheduler(IS, OptLevel); if (TLI->getSchedulingPreference() == Sched::RegPressure) @@ -356,6 +394,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { const Function &Fn = *mf.getFunction(); const TargetInstrInfo &TII = *TM.getInstrInfo(); const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); + const TargetLowering *TLI = TM.getTargetLowering(); MF = &mf; RegInfo = &MF->getRegInfo(); @@ -369,11 +408,17 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { ST.resetSubtargetFeatures(MF); TM.resetTargetOptions(MF); + // Reset OptLevel to None for optnone functions. + CodeGenOpt::Level NewOptLevel = OptLevel; + if (Fn.hasFnAttribute(Attribute::OptimizeNone)) + NewOptLevel = CodeGenOpt::None; + OptLevelChanger OLC(*this, NewOptLevel); + DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), this); - CurDAG->init(*MF, TTI); + CurDAG->init(*MF, TTI, TLI); FuncInfo->set(Fn, *MF); if (UseMBPI && OptLevel != CodeGenOpt::None) @@ -408,9 +453,13 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { EntryMBB->insert(EntryMBB->begin(), MI); else { MachineInstr *Def = RegInfo->getVRegDef(Reg); - MachineBasicBlock::iterator InsertPos = Def; - // FIXME: VR def may not be in entry block. - Def->getParent()->insert(llvm::next(InsertPos), MI); + if (Def) { + MachineBasicBlock::iterator InsertPos = Def; + // FIXME: VR def may not be in entry block. + Def->getParent()->insert(llvm::next(InsertPos), MI); + } else + DEBUG(dbgs() << "Dropping debug info for dead vreg" + << TargetRegisterInfo::virtReg2Index(Reg) << "\n"); } // If Reg is live-in then update debug info to track its copy in a vreg. @@ -422,7 +471,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { MachineBasicBlock::iterator InsertPos = Def; const MDNode *Variable = MI->getOperand(MI->getNumOperands()-1).getMetadata(); - bool IsIndirect = MI->getOperand(1).isImm(); + bool IsIndirect = MI->isIndirectDebugValue(); unsigned Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; // Def is never a terminator here, so it is ok to increment InsertPos. BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(), @@ -497,6 +546,10 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { if (J == E) break; To = J->second; } + // Make sure the new register has a sufficiently constrained register class. + if (TargetRegisterInfo::isVirtualRegister(From) && + TargetRegisterInfo::isVirtualRegister(To)) + MRI.constrainRegClass(To, MRI.getRegClass(From)); // Replace it. MRI.replaceRegWith(From, To); } @@ -617,6 +670,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { DEBUG(dbgs() << "Type-legalized selection DAG: BB#" << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); + CurDAG->NewNodesMustHaveLegalTypes = true; + if (Changed) { if (ViewDAGCombineLT) CurDAG->viewGraph("dag-combine-lt input for " + BlockName); @@ -1140,6 +1195,91 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { delete FastIS; SDB->clearDanglingDebugInfo(); + SDB->SPDescriptor.resetPerFunctionState(); +} + +/// Given that the input MI is before a partial terminator sequence TSeq, return +/// true if M + TSeq also a partial terminator sequence. +/// +/// A Terminator sequence is a sequence of MachineInstrs which at this point in +/// lowering copy vregs into physical registers, which are then passed into +/// terminator instructors so we can satisfy ABI constraints. A partial +/// terminator sequence is an improper subset of a terminator sequence (i.e. it +/// may be the whole terminator sequence). +static bool MIIsInTerminatorSequence(const MachineInstr *MI) { + // If we do not have a copy or an implicit def, we return true if and only if + // MI is a debug value. + if (!MI->isCopy() && !MI->isImplicitDef()) + // Sometimes DBG_VALUE MI sneak in between the copies from the vregs to the + // physical registers if there is debug info associated with the terminator + // of our mbb. We want to include said debug info in our terminator + // sequence, so we return true in that case. + return MI->isDebugValue(); + + // We have left the terminator sequence if we are not doing one of the + // following: + // + // 1. Copying a vreg into a physical register. + // 2. Copying a vreg into a vreg. + // 3. Defining a register via an implicit def. + + // OPI should always be a register definition... + MachineInstr::const_mop_iterator OPI = MI->operands_begin(); + if (!OPI->isReg() || !OPI->isDef()) + return false; + + // Defining any register via an implicit def is always ok. + if (MI->isImplicitDef()) + return true; + + // Grab the copy source... + MachineInstr::const_mop_iterator OPI2 = OPI; + ++OPI2; + assert(OPI2 != MI->operands_end() + && "Should have a copy implying we should have 2 arguments."); + + // Make sure that the copy dest is not a vreg when the copy source is a + // physical register. + if (!OPI2->isReg() || + (!TargetRegisterInfo::isPhysicalRegister(OPI->getReg()) && + TargetRegisterInfo::isPhysicalRegister(OPI2->getReg()))) + return false; + + return true; +} + +/// Find the split point at which to splice the end of BB into its success stack +/// protector check machine basic block. +/// +/// On many platforms, due to ABI constraints, terminators, even before register +/// allocation, use physical registers. This creates an issue for us since +/// physical registers at this point can not travel across basic +/// blocks. Luckily, selectiondag always moves physical registers into vregs +/// when they enter functions and moves them through a sequence of copies back +/// into the physical registers right before the terminator creating a +/// ``Terminator Sequence''. This function is searching for the beginning of the +/// terminator sequence so that we can ensure that we splice off not just the +/// terminator, but additionally the copies that move the vregs into the +/// physical registers. +static MachineBasicBlock::iterator +FindSplitPointForStackProtector(MachineBasicBlock *BB, DebugLoc DL) { + MachineBasicBlock::iterator SplitPoint = BB->getFirstTerminator(); + // + if (SplitPoint == BB->begin()) + return SplitPoint; + + MachineBasicBlock::iterator Start = BB->begin(); + MachineBasicBlock::iterator Previous = SplitPoint; + --Previous; + + while (MIIsInTerminatorSequence(Previous)) { + SplitPoint = Previous; + if (Previous == Start) + break; + --Previous; + } + + return SplitPoint; } void @@ -1152,11 +1292,13 @@ SelectionDAGISel::FinishBasicBlock() { << FuncInfo->PHINodesToUpdate[i].first << ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n"); + const bool MustUpdatePHINodes = SDB->SwitchCases.empty() && + SDB->JTCases.empty() && + SDB->BitTestCases.empty(); + // Next, now that we know what the last MBB the LLVM BB expanded is, update // PHI nodes in successors. - if (SDB->SwitchCases.empty() && - SDB->JTCases.empty() && - SDB->BitTestCases.empty()) { + if (MustUpdatePHINodes) { for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) { MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first); assert(PHI->isPHI() && @@ -1165,9 +1307,54 @@ SelectionDAGISel::FinishBasicBlock() { continue; PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB); } - return; } + // Handle stack protector. + if (SDB->SPDescriptor.shouldEmitStackProtector()) { + MachineBasicBlock *ParentMBB = SDB->SPDescriptor.getParentMBB(); + MachineBasicBlock *SuccessMBB = SDB->SPDescriptor.getSuccessMBB(); + + // Find the split point to split the parent mbb. At the same time copy all + // physical registers used in the tail of parent mbb into virtual registers + // before the split point and back into physical registers after the split + // point. This prevents us needing to deal with Live-ins and many other + // register allocation issues caused by us splitting the parent mbb. The + // register allocator will clean up said virtual copies later on. + MachineBasicBlock::iterator SplitPoint = + FindSplitPointForStackProtector(ParentMBB, SDB->getCurDebugLoc()); + + // Splice the terminator of ParentMBB into SuccessMBB. + SuccessMBB->splice(SuccessMBB->end(), ParentMBB, + SplitPoint, + ParentMBB->end()); + + // Add compare/jump on neq/jump to the parent BB. + FuncInfo->MBB = ParentMBB; + FuncInfo->InsertPt = ParentMBB->end(); + SDB->visitSPDescriptorParent(SDB->SPDescriptor, ParentMBB); + CurDAG->setRoot(SDB->getRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + + // CodeGen Failure MBB if we have not codegened it yet. + MachineBasicBlock *FailureMBB = SDB->SPDescriptor.getFailureMBB(); + if (!FailureMBB->size()) { + FuncInfo->MBB = FailureMBB; + FuncInfo->InsertPt = FailureMBB->end(); + SDB->visitSPDescriptorFailure(SDB->SPDescriptor); + CurDAG->setRoot(SDB->getRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + } + + // Clear the Per-BB State. + SDB->SPDescriptor.resetPerBBState(); + } + + // If we updated PHI Nodes, return early. + if (MustUpdatePHINodes) + return; + for (unsigned i = 0, e = SDB->BitTestCases.size(); i != e; ++i) { // Lower header first, if it wasn't already lowered if (!SDB->BitTestCases[i].Emitted) { @@ -1741,15 +1928,15 @@ WalkChainUsers(const SDNode *ChainedNode, SDNode *User = *UI; + if (User->getOpcode() == ISD::HANDLENODE) // Root of the graph. + continue; + // If we see an already-selected machine node, then we've gone beyond the // pattern that we're selecting down into the already selected chunk of the // DAG. - if (User->isMachineOpcode() || - User->getOpcode() == ISD::HANDLENODE) // Root of the graph. - continue; - unsigned UserOpcode = User->getOpcode(); - if (UserOpcode == ISD::CopyToReg || + if (User->isMachineOpcode() || + UserOpcode == ISD::CopyToReg || UserOpcode == ISD::CopyFromReg || UserOpcode == ISD::INLINEASM || UserOpcode == ISD::EH_LABEL || @@ -1886,7 +2073,6 @@ HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched, } } - SDValue Res; if (InputChains.size() == 1) return InputChains[0]; return CurDAG->getNode(ISD::TokenFactor, SDLoc(ChainNodesMatched[0]), @@ -1962,6 +2148,18 @@ CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, return N == RecordedNodes[RecNo].first; } +/// CheckChildSame - Implements OP_CheckChildXSame. +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +CheckChildSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N, + const SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes, + unsigned ChildNo) { + if (ChildNo >= N.getNumOperands()) + return false; // Match fails if out of range child #. + return ::CheckSame(MatcherTable, MatcherIndex, N.getOperand(ChildNo), + RecordedNodes); +} + /// CheckPatternPredicate - Implements OP_CheckPatternPredicate. LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex, @@ -2076,6 +2274,13 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table, case SelectionDAGISel::OPC_CheckSame: Result = !::CheckSame(Table, Index, N, RecordedNodes); return Index; + case SelectionDAGISel::OPC_CheckChild0Same: + case SelectionDAGISel::OPC_CheckChild1Same: + case SelectionDAGISel::OPC_CheckChild2Same: + case SelectionDAGISel::OPC_CheckChild3Same: + Result = !::CheckChildSame(Table, Index, N, RecordedNodes, + Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Same); + return Index; case SelectionDAGISel::OPC_CheckPatternPredicate: Result = !::CheckPatternPredicate(Table, Index, SDISel); return Index; @@ -2373,6 +2578,14 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case OPC_CheckSame: if (!::CheckSame(MatcherTable, MatcherIndex, N, RecordedNodes)) break; continue; + + case OPC_CheckChild0Same: case OPC_CheckChild1Same: + case OPC_CheckChild2Same: case OPC_CheckChild3Same: + if (!::CheckChildSame(MatcherTable, MatcherIndex, N, RecordedNodes, + Opcode-OPC_CheckChild0Same)) + break; + continue; + case OPC_CheckPatternPredicate: if (!::CheckPatternPredicate(MatcherTable, MatcherIndex, *this)) break; continue; @@ -2432,7 +2645,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, } case OPC_SwitchType: { - MVT CurNodeVT = N.getValueType().getSimpleVT(); + MVT CurNodeVT = N.getSimpleValueType(); unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart; unsigned CaseSize; while (1) { @@ -2544,7 +2757,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case OPC_EmitConvertToTarget: { // Convert from IMM/FPIMM to target version. unsigned RecNo = MatcherTable[MatcherIndex++]; - assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + assert(RecNo < RecordedNodes.size() && "Invalid EmitConvertToTarget"); SDValue Imm = RecordedNodes[RecNo].first; if (Imm->getOpcode() == ISD::Constant) { @@ -2569,7 +2782,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // Read all of the chained nodes. unsigned RecNo = Opcode == OPC_EmitMergeInputChains1_1; - assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + assert(RecNo < RecordedNodes.size() && "Invalid EmitMergeInputChains"); ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode()); // FIXME: What if other value results of the node have uses not matched @@ -2606,7 +2819,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // Read all of the chained nodes. for (unsigned i = 0; i != NumChains; ++i) { unsigned RecNo = MatcherTable[MatcherIndex++]; - assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + assert(RecNo < RecordedNodes.size() && "Invalid EmitMergeInputChains"); ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode()); // FIXME: What if other value results of the node have uses not matched @@ -2633,7 +2846,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case OPC_EmitCopyToReg: { unsigned RecNo = MatcherTable[MatcherIndex++]; - assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + assert(RecNo < RecordedNodes.size() && "Invalid EmitCopyToReg"); unsigned DestPhysReg = MatcherTable[MatcherIndex++]; if (InputChain.getNode() == 0) @@ -2650,7 +2863,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case OPC_EmitNodeXForm: { unsigned XFormNo = MatcherTable[MatcherIndex++]; unsigned RecNo = MatcherTable[MatcherIndex++]; - assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + assert(RecNo < RecordedNodes.size() && "Invalid EmitNodeXForm"); SDValue Res = RunSDNodeXForm(RecordedNodes[RecNo].first, XFormNo); RecordedNodes.push_back(std::pair<SDValue,SDNode*>(Res, (SDNode*) 0)); continue; @@ -2827,7 +3040,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (RecNo & 128) RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex); - assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + assert(RecNo < RecordedNodes.size() && "Invalid MarkGlueResults"); GlueResultNodesMatched.push_back(RecordedNodes[RecNo].first.getNode()); } continue; @@ -2844,7 +3057,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (ResSlot & 128) ResSlot = GetVBR(ResSlot, MatcherTable, MatcherIndex); - assert(ResSlot < RecordedNodes.size() && "Invalid CheckSame"); + assert(ResSlot < RecordedNodes.size() && "Invalid CompleteMatch"); SDValue Res = RecordedNodes[ResSlot].first; assert(i < NodeToMatch->getNumValues() && diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index e3c6306..82b068d 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -64,13 +64,29 @@ bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, return isUsedByReturnOnly(Node, Chain); } +/// \brief Set CallLoweringInfo attribute flags based on a call instruction +/// and called function attributes. +void TargetLowering::ArgListEntry::setAttributes(ImmutableCallSite *CS, + unsigned AttrIdx) { + isSExt = CS->paramHasAttr(AttrIdx, Attribute::SExt); + isZExt = CS->paramHasAttr(AttrIdx, Attribute::ZExt); + isInReg = CS->paramHasAttr(AttrIdx, Attribute::InReg); + isSRet = CS->paramHasAttr(AttrIdx, Attribute::StructRet); + isNest = CS->paramHasAttr(AttrIdx, Attribute::Nest); + isByVal = CS->paramHasAttr(AttrIdx, Attribute::ByVal); + isReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned); + Alignment = CS->getParamAlignment(AttrIdx); +} /// Generate a libcall taking the given operands as arguments and returning a /// result of type RetVT. -SDValue TargetLowering::makeLibCall(SelectionDAG &DAG, - RTLIB::Libcall LC, EVT RetVT, - const SDValue *Ops, unsigned NumOps, - bool isSigned, SDLoc dl) const { +std::pair<SDValue, SDValue> +TargetLowering::makeLibCall(SelectionDAG &DAG, + RTLIB::Libcall LC, EVT RetVT, + const SDValue *Ops, unsigned NumOps, + bool isSigned, SDLoc dl, + bool doesNotReturn, + bool isReturnValueUsed) const { TargetLowering::ArgListTy Args; Args.reserve(NumOps); @@ -89,11 +105,9 @@ SDValue TargetLowering::makeLibCall(SelectionDAG &DAG, CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, false, 0, getLibcallCallingConv(LC), /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, dl); - std::pair<SDValue,SDValue> CallInfo = LowerCallTo(CLI); - - return CallInfo.first; + doesNotReturn, isReturnValueUsed, Callee, Args, + DAG, dl); + return LowerCallTo(CLI); } @@ -183,14 +197,16 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, // Use the target specific return value for comparions lib calls. EVT RetVT = getCmpLibcallReturnType(); SDValue Ops[2] = { NewLHS, NewRHS }; - NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, 2, false/*sign irrelevant*/, dl); + NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, 2, false/*sign irrelevant*/, + dl).first; NewRHS = DAG.getConstant(0, RetVT); CCCode = getCmpLibcallCC(LC1); if (LC2 != RTLIB::UNKNOWN_LIBCALL) { SDValue Tmp = DAG.getNode(ISD::SETCC, dl, getSetCCResultType(*DAG.getContext(), RetVT), NewLHS, NewRHS, DAG.getCondCode(CCCode)); - NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, 2, false/*sign irrelevant*/, dl); + NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, 2, false/*sign irrelevant*/, + dl).first; NewLHS = DAG.getNode(ISD::SETCC, dl, getSetCCResultType(*DAG.getContext(), RetVT), NewLHS, NewRHS, DAG.getCondCode(getCmpLibcallCC(LC2))); @@ -632,6 +648,31 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), NarrowShl)); } + // Repeat the SHL optimization above in cases where an extension + // intervenes: (shl (anyext (shr x, c1)), c2) to + // (shl (anyext x), c2-c1). This requires that the bottom c1 bits + // aren't demanded (as above) and that the shifted upper c1 bits of + // x aren't demanded. + if (InOp.hasOneUse() && + InnerOp.getOpcode() == ISD::SRL && + InnerOp.hasOneUse() && + isa<ConstantSDNode>(InnerOp.getOperand(1))) { + uint64_t InnerShAmt = cast<ConstantSDNode>(InnerOp.getOperand(1)) + ->getZExtValue(); + if (InnerShAmt < ShAmt && + InnerShAmt < InnerBits && + NewMask.lshr(InnerBits - InnerShAmt + ShAmt) == 0 && + NewMask.trunc(ShAmt) == 0) { + SDValue NewSA = + TLO.DAG.getConstant(ShAmt - InnerShAmt, + Op.getOperand(1).getValueType()); + EVT VT = Op.getValueType(); + SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, + InnerOp.getOperand(0)); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, + NewExt, NewSA)); + } + } } KnownZero <<= SA->getZExtValue(); @@ -722,13 +763,24 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If the input sign bit is known to be zero, or if none of the top bits // are demanded, turn this into an unsigned shift right. - if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) { + if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op.getOperand(0), Op.getOperand(1))); - } else if (KnownOne.intersects(SignBit)) { // New bits are known one. - KnownOne |= HighBits; + + int Log2 = NewMask.exactLogBase2(); + if (Log2 >= 0) { + // The bit must come from the sign. + SDValue NewSA = + TLO.DAG.getConstant(BitWidth - 1 - Log2, + Op.getOperand(1).getValueType()); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, + Op.getOperand(0), NewSA)); } + + if (KnownOne.intersects(SignBit)) + // New bits are known one. + KnownOne |= HighBits; } break; case ISD::SIGN_EXTEND_INREG: { @@ -1077,13 +1129,20 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, case ISD::SETFALSE: case ISD::SETFALSE2: return DAG.getConstant(0, VT); case ISD::SETTRUE: - case ISD::SETTRUE2: return DAG.getConstant(1, VT); + case ISD::SETTRUE2: { + TargetLowering::BooleanContent Cnt = getBooleanContents(VT.isVector()); + return DAG.getConstant( + Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, VT); + } } // Ensure that the constant occurs on the RHS, and fold constant // comparisons. - if (isa<ConstantSDNode>(N0.getNode())) - return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond)); + ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond); + if (isa<ConstantSDNode>(N0.getNode()) && + (DCI.isBeforeLegalizeOps() || + isCondCodeLegal(SwappedCC, N0.getSimpleValueType()))) + return DAG.getSetCC(dl, VT, N1, N0, SwappedCC); if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) { const APInt &C1 = N1C->getAPIntValue(); @@ -1178,6 +1237,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // the test is for equality or unsigned, and all 1 bits of the const are // in the same partial word, see if we can shorten the load. if (DCI.isBeforeLegalize() && + !ISD::isSignedIntSetCC(Cond) && N0.getOpcode() == ISD::AND && C1 == 0 && N0.getNode()->hasOneUse() && isa<LoadSDNode>(N0.getOperand(0)) && @@ -1322,7 +1382,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType().isInteger()); - return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC); + if (DCI.isBeforeLegalizeOps() || + isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType())) + return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC); } if ((N0.getOpcode() == ISD::XOR || @@ -1759,16 +1821,22 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (N0.getOperand(0) == N1 || N0.getOperand(1) == N1) { if (ValueHasExactlyOneBitSet(N1, DAG)) { Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true); - SDValue Zero = DAG.getConstant(0, N1.getValueType()); - return DAG.getSetCC(dl, VT, N0, Zero, Cond); + if (DCI.isBeforeLegalizeOps() || + isCondCodeLegal(Cond, N0.getSimpleValueType())) { + SDValue Zero = DAG.getConstant(0, N1.getValueType()); + return DAG.getSetCC(dl, VT, N0, Zero, Cond); + } } } if (N1.getOpcode() == ISD::AND) if (N1.getOperand(0) == N0 || N1.getOperand(1) == N0) { if (ValueHasExactlyOneBitSet(N0, DAG)) { Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true); - SDValue Zero = DAG.getConstant(0, N0.getValueType()); - return DAG.getSetCC(dl, VT, N1, Zero, Cond); + if (DCI.isBeforeLegalizeOps() || + isCondCodeLegal(Cond, N1.getSimpleValueType())) { + SDValue Zero = DAG.getConstant(0, N0.getValueType()); + return DAG.getSetCC(dl, VT, N1, Zero, Cond); + } } } } @@ -1993,7 +2061,7 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::pair<unsigned, const TargetRegisterClass*> TargetLowering:: getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const { - if (Constraint[0] != '{') + if (Constraint.empty() || Constraint[0] != '{') return std::make_pair(0u, static_cast<TargetRegisterClass*>(0)); assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?"); @@ -2142,8 +2210,9 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( break; } } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) { - OpInfo.ConstraintVT = MVT::getIntegerVT( - 8*getDataLayout()->getPointerSize(PT->getAddressSpace())); + unsigned PtrSize + = getDataLayout()->getPointerSizeInBits(PT->getAddressSpace()); + OpInfo.ConstraintVT = MVT::getIntegerVT(PtrSize); } else { OpInfo.ConstraintVT = MVT::getVT(OpTy, true); } diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp deleted file mode 100644 index 6c826de..0000000 --- a/lib/CodeGen/ShrinkWrapping.cpp +++ /dev/null @@ -1,1152 +0,0 @@ -//===-- ShrinkWrapping.cpp - Reduce spills/restores of callee-saved regs --===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements a shrink wrapping variant of prolog/epilog insertion: -// - Spills and restores of callee-saved registers (CSRs) are placed in the -// machine CFG to tightly surround their uses so that execution paths that -// do not use CSRs do not pay the spill/restore penalty. -// -// - Avoiding placment of spills/restores in loops: if a CSR is used inside a -// loop the spills are placed in the loop preheader, and restores are -// placed in the loop exit nodes (the successors of loop _exiting_ nodes). -// -// - Covering paths without CSR uses: -// If a region in a CFG uses CSRs and has multiple entry and/or exit points, -// the use info for the CSRs inside the region is propagated outward in the -// CFG to ensure validity of the spill/restore placements. This decreases -// the effectiveness of shrink wrapping but does not require edge splitting -// in the machine CFG. -// -// This shrink wrapping implementation uses an iterative analysis to determine -// which basic blocks require spills and restores for CSRs. -// -// This pass uses MachineDominators and MachineLoopInfo. Loop information -// is used to prevent placement of callee-saved register spills/restores -// in the bodies of loops. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "shrink-wrap" - -#include "PrologEpilogInserter.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SparseBitVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/Debug.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include <sstream> - -using namespace llvm; - -STATISTIC(numSRReduced, "Number of CSR spills+restores reduced."); - -// Shrink Wrapping: -static cl::opt<bool> -ShrinkWrapping("shrink-wrap", - cl::desc("Shrink wrap callee-saved register spills/restores")); - -// Shrink wrap only the specified function, a debugging aid. -static cl::opt<std::string> -ShrinkWrapFunc("shrink-wrap-func", cl::Hidden, - cl::desc("Shrink wrap the specified function"), - cl::value_desc("funcname"), - cl::init("")); - -// Debugging level for shrink wrapping. -enum ShrinkWrapDebugLevel { - Disabled, BasicInfo, Iterations, Details -}; - -static cl::opt<enum ShrinkWrapDebugLevel> -ShrinkWrapDebugging("shrink-wrap-dbg", cl::Hidden, - cl::desc("Print shrink wrapping debugging information"), - cl::values( - clEnumVal(Disabled , "disable debug output"), - clEnumVal(BasicInfo , "print basic DF sets"), - clEnumVal(Iterations, "print SR sets for each iteration"), - clEnumVal(Details , "print all DF sets"), - clEnumValEnd)); - - -void PEI::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - if (ShrinkWrapping || ShrinkWrapFunc != "") { - AU.addRequired<MachineLoopInfo>(); - AU.addRequired<MachineDominatorTree>(); - } - AU.addPreserved<MachineLoopInfo>(); - AU.addPreserved<MachineDominatorTree>(); - AU.addRequired<TargetPassConfig>(); - MachineFunctionPass::getAnalysisUsage(AU); -} - -//===----------------------------------------------------------------------===// -// ShrinkWrapping implementation -//===----------------------------------------------------------------------===// - -// Convienences for dealing with machine loops. -MachineBasicBlock* PEI::getTopLevelLoopPreheader(MachineLoop* LP) { - assert(LP && "Machine loop is NULL."); - MachineBasicBlock* PHDR = LP->getLoopPreheader(); - MachineLoop* PLP = LP->getParentLoop(); - while (PLP) { - PHDR = PLP->getLoopPreheader(); - PLP = PLP->getParentLoop(); - } - return PHDR; -} - -MachineLoop* PEI::getTopLevelLoopParent(MachineLoop *LP) { - if (LP == 0) - return 0; - MachineLoop* PLP = LP->getParentLoop(); - while (PLP) { - LP = PLP; - PLP = PLP->getParentLoop(); - } - return LP; -} - -bool PEI::isReturnBlock(MachineBasicBlock* MBB) { - return (MBB && !MBB->empty() && MBB->back().isReturn()); -} - -// Initialize shrink wrapping DFA sets, called before iterations. -void PEI::clearAnticAvailSets() { - AnticIn.clear(); - AnticOut.clear(); - AvailIn.clear(); - AvailOut.clear(); -} - -// Clear all sets constructed by shrink wrapping. -void PEI::clearAllSets() { - ReturnBlocks.clear(); - clearAnticAvailSets(); - UsedCSRegs.clear(); - CSRUsed.clear(); - TLLoops.clear(); - CSRSave.clear(); - CSRRestore.clear(); -} - -// Initialize all shrink wrapping data. -void PEI::initShrinkWrappingInfo() { - clearAllSets(); - EntryBlock = 0; -#ifndef NDEBUG - HasFastExitPath = false; -#endif - ShrinkWrapThisFunction = ShrinkWrapping; - // DEBUG: enable or disable shrink wrapping for the current function - // via --shrink-wrap-func=<funcname>. -#ifndef NDEBUG - if (ShrinkWrapFunc != "") { - std::string MFName = MF->getName().str(); - ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc); - } -#endif -} - - -/// placeCSRSpillsAndRestores - determine which MBBs of the function -/// need save, restore code for callee-saved registers by doing a DF analysis -/// similar to the one used in code motion (GVNPRE). This produces maps of MBBs -/// to sets of registers (CSRs) for saves and restores. MachineLoopInfo -/// is used to ensure that CSR save/restore code is not placed inside loops. -/// This function computes the maps of MBBs -> CSRs to spill and restore -/// in CSRSave, CSRRestore. -/// -/// If shrink wrapping is not being performed, place all spills in -/// the entry block, all restores in return blocks. In this case, -/// CSRSave has a single mapping, CSRRestore has mappings for each -/// return block. -/// -void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) { - - DEBUG(MF = &Fn); - - initShrinkWrappingInfo(); - - DEBUG(if (ShrinkWrapThisFunction) { - dbgs() << "Place CSR spills/restores for " - << MF->getName() << "\n"; - }); - - if (calculateSets(Fn)) - placeSpillsAndRestores(Fn); -} - -/// calcAnticInOut - calculate the anticipated in/out reg sets -/// for the given MBB by looking forward in the MCFG at MBB's -/// successors. -/// -bool PEI::calcAnticInOut(MachineBasicBlock* MBB) { - bool changed = false; - - // AnticOut[MBB] = INTERSECT(AnticIn[S] for S in SUCCESSORS(MBB)) - SmallVector<MachineBasicBlock*, 4> successors; - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) { - MachineBasicBlock* SUCC = *SI; - if (SUCC != MBB) - successors.push_back(SUCC); - } - - unsigned i = 0, e = successors.size(); - if (i != e) { - CSRegSet prevAnticOut = AnticOut[MBB]; - MachineBasicBlock* SUCC = successors[i]; - - AnticOut[MBB] = AnticIn[SUCC]; - for (++i; i != e; ++i) { - SUCC = successors[i]; - AnticOut[MBB] &= AnticIn[SUCC]; - } - if (prevAnticOut != AnticOut[MBB]) - changed = true; - } - - // AnticIn[MBB] = UNION(CSRUsed[MBB], AnticOut[MBB]); - CSRegSet prevAnticIn = AnticIn[MBB]; - AnticIn[MBB] = CSRUsed[MBB] | AnticOut[MBB]; - if (prevAnticIn != AnticIn[MBB]) - changed = true; - return changed; -} - -/// calcAvailInOut - calculate the available in/out reg sets -/// for the given MBB by looking backward in the MCFG at MBB's -/// predecessors. -/// -bool PEI::calcAvailInOut(MachineBasicBlock* MBB) { - bool changed = false; - - // AvailIn[MBB] = INTERSECT(AvailOut[P] for P in PREDECESSORS(MBB)) - SmallVector<MachineBasicBlock*, 4> predecessors; - for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { - MachineBasicBlock* PRED = *PI; - if (PRED != MBB) - predecessors.push_back(PRED); - } - - unsigned i = 0, e = predecessors.size(); - if (i != e) { - CSRegSet prevAvailIn = AvailIn[MBB]; - MachineBasicBlock* PRED = predecessors[i]; - - AvailIn[MBB] = AvailOut[PRED]; - for (++i; i != e; ++i) { - PRED = predecessors[i]; - AvailIn[MBB] &= AvailOut[PRED]; - } - if (prevAvailIn != AvailIn[MBB]) - changed = true; - } - - // AvailOut[MBB] = UNION(CSRUsed[MBB], AvailIn[MBB]); - CSRegSet prevAvailOut = AvailOut[MBB]; - AvailOut[MBB] = CSRUsed[MBB] | AvailIn[MBB]; - if (prevAvailOut != AvailOut[MBB]) - changed = true; - return changed; -} - -/// calculateAnticAvail - build the sets anticipated and available -/// registers in the MCFG of the current function iteratively, -/// doing a combined forward and backward analysis. -/// -void PEI::calculateAnticAvail(MachineFunction &Fn) { - // Initialize data flow sets. - clearAnticAvailSets(); - - // Calculate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG. - bool changed = true; - unsigned iterations = 0; - while (changed) { - changed = false; - ++iterations; - for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); - MBBI != MBBE; ++MBBI) { - MachineBasicBlock* MBB = MBBI; - - // Calculate anticipated in, out regs at MBB from - // anticipated at successors of MBB. - changed |= calcAnticInOut(MBB); - - // Calculate available in, out regs at MBB from - // available at predecessors of MBB. - changed |= calcAvailInOut(MBB); - } - } - - DEBUG({ - if (ShrinkWrapDebugging >= Details) { - dbgs() - << "-----------------------------------------------------------\n" - << " Antic/Avail Sets:\n" - << "-----------------------------------------------------------\n" - << "iterations = " << iterations << "\n" - << "-----------------------------------------------------------\n" - << "MBB | USED | ANTIC_IN | ANTIC_OUT | AVAIL_IN | AVAIL_OUT\n" - << "-----------------------------------------------------------\n"; - - for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); - MBBI != MBBE; ++MBBI) { - MachineBasicBlock* MBB = MBBI; - dumpSets(MBB); - } - - dbgs() - << "-----------------------------------------------------------\n"; - } - }); -} - -/// propagateUsesAroundLoop - copy used register info from MBB to all blocks -/// of the loop given by LP and its parent loops. This prevents spills/restores -/// from being placed in the bodies of loops. -/// -void PEI::propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP) { - if (! MBB || !LP) - return; - - std::vector<MachineBasicBlock*> loopBlocks = LP->getBlocks(); - for (unsigned i = 0, e = loopBlocks.size(); i != e; ++i) { - MachineBasicBlock* LBB = loopBlocks[i]; - if (LBB == MBB) - continue; - if (CSRUsed[LBB].contains(CSRUsed[MBB])) - continue; - CSRUsed[LBB] |= CSRUsed[MBB]; - } -} - -/// calculateSets - collect the CSRs used in this function, compute -/// the DF sets that describe the initial minimal regions in the -/// Machine CFG around which CSR spills and restores must be placed. -/// -/// Additionally, this function decides if shrink wrapping should -/// be disabled for the current function, checking the following: -/// 1. the current function has more than 500 MBBs: heuristic limit -/// on function size to reduce compile time impact of the current -/// iterative algorithm. -/// 2. all CSRs are used in the entry block. -/// 3. all CSRs are used in all immediate successors of the entry block. -/// 4. all CSRs are used in a subset of blocks, each of which dominates -/// all return blocks. These blocks, taken as a subgraph of the MCFG, -/// are equivalent to the entry block since all execution paths pass -/// through them. -/// -bool PEI::calculateSets(MachineFunction &Fn) { - // Sets used to compute spill, restore placement sets. - const std::vector<CalleeSavedInfo> CSI = - Fn.getFrameInfo()->getCalleeSavedInfo(); - - // If no CSRs used, we are done. - if (CSI.empty()) { - DEBUG(if (ShrinkWrapThisFunction) - dbgs() << "DISABLED: " << Fn.getName() - << ": uses no callee-saved registers\n"); - return false; - } - - // Save refs to entry and return blocks. - EntryBlock = Fn.begin(); - for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end(); - MBB != E; ++MBB) - if (isReturnBlock(MBB)) - ReturnBlocks.push_back(MBB); - - // Determine if this function has fast exit paths. - DEBUG(if (ShrinkWrapThisFunction) - findFastExitPath()); - - // Limit shrink wrapping via the current iterative bit vector - // implementation to functions with <= 500 MBBs. - if (Fn.size() > 500) { - DEBUG(if (ShrinkWrapThisFunction) - dbgs() << "DISABLED: " << Fn.getName() - << ": too large (" << Fn.size() << " MBBs)\n"); - ShrinkWrapThisFunction = false; - } - - // Return now if not shrink wrapping. - if (! ShrinkWrapThisFunction) - return false; - - // Collect set of used CSRs. - for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) { - UsedCSRegs.set(inx); - } - - // Walk instructions in all MBBs, create CSRUsed[] sets, choose - // whether or not to shrink wrap this function. - MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>(); - MachineDominatorTree &DT = getAnalysis<MachineDominatorTree>(); - const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); - - bool allCSRUsesInEntryBlock = true; - for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); - MBBI != MBBE; ++MBBI) { - MachineBasicBlock* MBB = MBBI; - for (MachineBasicBlock::iterator I = MBB->begin(); I != MBB->end(); ++I) { - for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) { - unsigned Reg = CSI[inx].getReg(); - // If instruction I reads or modifies Reg, add it to UsedCSRegs, - // CSRUsed map for the current block. - for (unsigned opInx = 0, opEnd = I->getNumOperands(); - opInx != opEnd; ++opInx) { - const MachineOperand &MO = I->getOperand(opInx); - if (! (MO.isReg() && (MO.isUse() || MO.isDef()))) - continue; - unsigned MOReg = MO.getReg(); - if (!MOReg) - continue; - if (MOReg == Reg || - (TargetRegisterInfo::isPhysicalRegister(MOReg) && - TargetRegisterInfo::isPhysicalRegister(Reg) && - TRI->isSubRegister(Reg, MOReg))) { - // CSR Reg is defined/used in block MBB. - CSRUsed[MBB].set(inx); - // Check for uses in EntryBlock. - if (MBB != EntryBlock) - allCSRUsesInEntryBlock = false; - } - } - } - } - - if (CSRUsed[MBB].empty()) - continue; - - // Propagate CSRUsed[MBB] in loops - if (MachineLoop* LP = LI.getLoopFor(MBB)) { - // Add top level loop to work list. - MachineBasicBlock* HDR = getTopLevelLoopPreheader(LP); - MachineLoop* PLP = getTopLevelLoopParent(LP); - - if (! HDR) { - HDR = PLP->getHeader(); - assert(HDR->pred_size() > 0 && "Loop header has no predecessors?"); - MachineBasicBlock::pred_iterator PI = HDR->pred_begin(); - HDR = *PI; - } - TLLoops[HDR] = PLP; - - // Push uses from inside loop to its parent loops, - // or to all other MBBs in its loop. - if (LP->getLoopDepth() > 1) { - for (MachineLoop* PLP = LP->getParentLoop(); PLP; - PLP = PLP->getParentLoop()) { - propagateUsesAroundLoop(MBB, PLP); - } - } else { - propagateUsesAroundLoop(MBB, LP); - } - } - } - - if (allCSRUsesInEntryBlock) { - DEBUG(dbgs() << "DISABLED: " << Fn.getName() - << ": all CSRs used in EntryBlock\n"); - ShrinkWrapThisFunction = false; - } else { - bool allCSRsUsedInEntryFanout = true; - for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(), - SE = EntryBlock->succ_end(); SI != SE; ++SI) { - MachineBasicBlock* SUCC = *SI; - if (CSRUsed[SUCC] != UsedCSRegs) - allCSRsUsedInEntryFanout = false; - } - if (allCSRsUsedInEntryFanout) { - DEBUG(dbgs() << "DISABLED: " << Fn.getName() - << ": all CSRs used in imm successors of EntryBlock\n"); - ShrinkWrapThisFunction = false; - } - } - - if (ShrinkWrapThisFunction) { - // Check if MBB uses CSRs and dominates all exit nodes. - // Such nodes are equiv. to the entry node w.r.t. - // CSR uses: every path through the function must - // pass through this node. If each CSR is used at least - // once by these nodes, shrink wrapping is disabled. - CSRegSet CSRUsedInChokePoints; - for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); - MBBI != MBBE; ++MBBI) { - MachineBasicBlock* MBB = MBBI; - if (MBB == EntryBlock || CSRUsed[MBB].empty() || MBB->succ_size() < 1) - continue; - bool dominatesExitNodes = true; - for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) - if (! DT.dominates(MBB, ReturnBlocks[ri])) { - dominatesExitNodes = false; - break; - } - if (dominatesExitNodes) { - CSRUsedInChokePoints |= CSRUsed[MBB]; - if (CSRUsedInChokePoints == UsedCSRegs) { - DEBUG(dbgs() << "DISABLED: " << Fn.getName() - << ": all CSRs used in choke point(s) at " - << getBasicBlockName(MBB) << "\n"); - ShrinkWrapThisFunction = false; - break; - } - } - } - } - - // Return now if we have decided not to apply shrink wrapping - // to the current function. - if (! ShrinkWrapThisFunction) - return false; - - DEBUG({ - dbgs() << "ENABLED: " << Fn.getName(); - if (HasFastExitPath) - dbgs() << " (fast exit path)"; - dbgs() << "\n"; - if (ShrinkWrapDebugging >= BasicInfo) { - dbgs() << "------------------------------" - << "-----------------------------\n"; - dbgs() << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n"; - if (ShrinkWrapDebugging >= Details) { - dbgs() << "------------------------------" - << "-----------------------------\n"; - dumpAllUsed(); - } - } - }); - - // Build initial DF sets to determine minimal regions in the - // Machine CFG around which CSRs must be spilled and restored. - calculateAnticAvail(Fn); - - return true; -} - -/// addUsesForMEMERegion - add uses of CSRs spilled or restored in -/// multi-entry, multi-exit (MEME) regions so spill and restore -/// placement will not break code that enters or leaves a -/// shrink-wrapped region by inducing spills with no matching -/// restores or restores with no matching spills. A MEME region -/// is a subgraph of the MCFG with multiple entry edges, multiple -/// exit edges, or both. This code propagates use information -/// through the MCFG until all paths requiring spills and restores -/// _outside_ the computed minimal placement regions have been covered. -/// -bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB, - SmallVectorImpl<MachineBasicBlock *> &blks) { - if (MBB->succ_size() < 2 && MBB->pred_size() < 2) { - bool processThisBlock = false; - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) { - MachineBasicBlock* SUCC = *SI; - if (SUCC->pred_size() > 1) { - processThisBlock = true; - break; - } - } - if (!CSRRestore[MBB].empty() && MBB->succ_size() > 0) { - for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { - MachineBasicBlock* PRED = *PI; - if (PRED->succ_size() > 1) { - processThisBlock = true; - break; - } - } - } - if (! processThisBlock) - return false; - } - - CSRegSet prop; - if (!CSRSave[MBB].empty()) - prop = CSRSave[MBB]; - else if (!CSRRestore[MBB].empty()) - prop = CSRRestore[MBB]; - else - prop = CSRUsed[MBB]; - if (prop.empty()) - return false; - - // Propagate selected bits to successors, predecessors of MBB. - bool addedUses = false; - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) { - MachineBasicBlock* SUCC = *SI; - // Self-loop - if (SUCC == MBB) - continue; - if (! CSRUsed[SUCC].contains(prop)) { - CSRUsed[SUCC] |= prop; - addedUses = true; - blks.push_back(SUCC); - DEBUG(if (ShrinkWrapDebugging >= Iterations) - dbgs() << getBasicBlockName(MBB) - << "(" << stringifyCSRegSet(prop) << ")->" - << "successor " << getBasicBlockName(SUCC) << "\n"); - } - } - for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { - MachineBasicBlock* PRED = *PI; - // Self-loop - if (PRED == MBB) - continue; - if (! CSRUsed[PRED].contains(prop)) { - CSRUsed[PRED] |= prop; - addedUses = true; - blks.push_back(PRED); - DEBUG(if (ShrinkWrapDebugging >= Iterations) - dbgs() << getBasicBlockName(MBB) - << "(" << stringifyCSRegSet(prop) << ")->" - << "predecessor " << getBasicBlockName(PRED) << "\n"); - } - } - return addedUses; -} - -/// addUsesForTopLevelLoops - add uses for CSRs used inside top -/// level loops to the exit blocks of those loops. -/// -bool PEI::addUsesForTopLevelLoops(SmallVectorImpl<MachineBasicBlock *> &blks) { - bool addedUses = false; - - // Place restores for top level loops where needed. - for (DenseMap<MachineBasicBlock*, MachineLoop*>::iterator - I = TLLoops.begin(), E = TLLoops.end(); I != E; ++I) { - MachineBasicBlock* MBB = I->first; - MachineLoop* LP = I->second; - MachineBasicBlock* HDR = LP->getHeader(); - SmallVector<MachineBasicBlock*, 4> exitBlocks; - CSRegSet loopSpills; - - loopSpills = CSRSave[MBB]; - if (CSRSave[MBB].empty()) { - loopSpills = CSRUsed[HDR]; - assert(!loopSpills.empty() && "No CSRs used in loop?"); - } else if (CSRRestore[MBB].contains(CSRSave[MBB])) - continue; - - LP->getExitBlocks(exitBlocks); - assert(exitBlocks.size() > 0 && "Loop has no top level exit blocks?"); - for (unsigned i = 0, e = exitBlocks.size(); i != e; ++i) { - MachineBasicBlock* EXB = exitBlocks[i]; - if (! CSRUsed[EXB].contains(loopSpills)) { - CSRUsed[EXB] |= loopSpills; - addedUses = true; - DEBUG(if (ShrinkWrapDebugging >= Iterations) - dbgs() << "LOOP " << getBasicBlockName(MBB) - << "(" << stringifyCSRegSet(loopSpills) << ")->" - << getBasicBlockName(EXB) << "\n"); - if (EXB->succ_size() > 1 || EXB->pred_size() > 1) - blks.push_back(EXB); - } - } - } - return addedUses; -} - -/// calcSpillPlacements - determine which CSRs should be spilled -/// in MBB using AnticIn sets of MBB's predecessors, keeping track -/// of changes to spilled reg sets. Add MBB to the set of blocks -/// that need to be processed for propagating use info to cover -/// multi-entry/exit regions. -/// -bool PEI::calcSpillPlacements(MachineBasicBlock* MBB, - SmallVectorImpl<MachineBasicBlock *> &blks, - CSRegBlockMap &prevSpills) { - bool placedSpills = false; - // Intersect (CSRegs - AnticIn[P]) for P in Predecessors(MBB) - CSRegSet anticInPreds; - SmallVector<MachineBasicBlock*, 4> predecessors; - for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { - MachineBasicBlock* PRED = *PI; - if (PRED != MBB) - predecessors.push_back(PRED); - } - unsigned i = 0, e = predecessors.size(); - if (i != e) { - MachineBasicBlock* PRED = predecessors[i]; - anticInPreds = UsedCSRegs - AnticIn[PRED]; - for (++i; i != e; ++i) { - PRED = predecessors[i]; - anticInPreds &= (UsedCSRegs - AnticIn[PRED]); - } - } else { - // Handle uses in entry blocks (which have no predecessors). - // This is necessary because the DFA formulation assumes the - // entry and (multiple) exit nodes cannot have CSR uses, which - // is not the case in the real world. - anticInPreds = UsedCSRegs; - } - // Compute spills required at MBB: - CSRSave[MBB] |= (AnticIn[MBB] - AvailIn[MBB]) & anticInPreds; - - if (! CSRSave[MBB].empty()) { - if (MBB == EntryBlock) { - for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) - CSRRestore[ReturnBlocks[ri]] |= CSRSave[MBB]; - } else { - // Reset all regs spilled in MBB that are also spilled in EntryBlock. - if (CSRSave[EntryBlock].intersects(CSRSave[MBB])) { - CSRSave[MBB] = CSRSave[MBB] - CSRSave[EntryBlock]; - } - } - } - placedSpills = (CSRSave[MBB] != prevSpills[MBB]); - prevSpills[MBB] = CSRSave[MBB]; - // Remember this block for adding restores to successor - // blocks for multi-entry region. - if (placedSpills) - blks.push_back(MBB); - - DEBUG(if (! CSRSave[MBB].empty() && ShrinkWrapDebugging >= Iterations) - dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(CSRSave[MBB]) << "\n"); - - return placedSpills; -} - -/// calcRestorePlacements - determine which CSRs should be restored -/// in MBB using AvailOut sets of MBB's succcessors, keeping track -/// of changes to restored reg sets. Add MBB to the set of blocks -/// that need to be processed for propagating use info to cover -/// multi-entry/exit regions. -/// -bool PEI::calcRestorePlacements(MachineBasicBlock* MBB, - SmallVectorImpl<MachineBasicBlock *> &blks, - CSRegBlockMap &prevRestores) { - bool placedRestores = false; - // Intersect (CSRegs - AvailOut[S]) for S in Successors(MBB) - CSRegSet availOutSucc; - SmallVector<MachineBasicBlock*, 4> successors; - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) { - MachineBasicBlock* SUCC = *SI; - if (SUCC != MBB) - successors.push_back(SUCC); - } - unsigned i = 0, e = successors.size(); - if (i != e) { - MachineBasicBlock* SUCC = successors[i]; - availOutSucc = UsedCSRegs - AvailOut[SUCC]; - for (++i; i != e; ++i) { - SUCC = successors[i]; - availOutSucc &= (UsedCSRegs - AvailOut[SUCC]); - } - } else { - if (! CSRUsed[MBB].empty() || ! AvailOut[MBB].empty()) { - // Handle uses in return blocks (which have no successors). - // This is necessary because the DFA formulation assumes the - // entry and (multiple) exit nodes cannot have CSR uses, which - // is not the case in the real world. - availOutSucc = UsedCSRegs; - } - } - // Compute restores required at MBB: - CSRRestore[MBB] |= (AvailOut[MBB] - AnticOut[MBB]) & availOutSucc; - - // Postprocess restore placements at MBB. - // Remove the CSRs that are restored in the return blocks. - // Lest this be confusing, note that: - // CSRSave[EntryBlock] == CSRRestore[B] for all B in ReturnBlocks. - if (MBB->succ_size() && ! CSRRestore[MBB].empty()) { - if (! CSRSave[EntryBlock].empty()) - CSRRestore[MBB] = CSRRestore[MBB] - CSRSave[EntryBlock]; - } - placedRestores = (CSRRestore[MBB] != prevRestores[MBB]); - prevRestores[MBB] = CSRRestore[MBB]; - // Remember this block for adding saves to predecessor - // blocks for multi-entry region. - if (placedRestores) - blks.push_back(MBB); - - DEBUG(if (! CSRRestore[MBB].empty() && ShrinkWrapDebugging >= Iterations) - dbgs() << "RESTORE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(CSRRestore[MBB]) << "\n"); - - return placedRestores; -} - -/// placeSpillsAndRestores - place spills and restores of CSRs -/// used in MBBs in minimal regions that contain the uses. -/// -void PEI::placeSpillsAndRestores(MachineFunction &Fn) { - CSRegBlockMap prevCSRSave; - CSRegBlockMap prevCSRRestore; - SmallVector<MachineBasicBlock*, 4> cvBlocks, ncvBlocks; - bool changed = true; - unsigned iterations = 0; - - // Iterate computation of spill and restore placements in the MCFG until: - // 1. CSR use info has been fully propagated around the MCFG, and - // 2. computation of CSRSave[], CSRRestore[] reach fixed points. - while (changed) { - changed = false; - ++iterations; - - DEBUG(if (ShrinkWrapDebugging >= Iterations) - dbgs() << "iter " << iterations - << " --------------------------------------------------\n"); - - // Calculate CSR{Save,Restore} sets using Antic, Avail on the MCFG, - // which determines the placements of spills and restores. - // Keep track of changes to spills, restores in each iteration to - // minimize the total iterations. - bool SRChanged = false; - for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); - MBBI != MBBE; ++MBBI) { - MachineBasicBlock* MBB = MBBI; - - // Place spills for CSRs in MBB. - SRChanged |= calcSpillPlacements(MBB, cvBlocks, prevCSRSave); - - // Place restores for CSRs in MBB. - SRChanged |= calcRestorePlacements(MBB, cvBlocks, prevCSRRestore); - } - - // Add uses of CSRs used inside loops where needed. - changed |= addUsesForTopLevelLoops(cvBlocks); - - // Add uses for CSRs spilled or restored at branch, join points. - if (changed || SRChanged) { - while (! cvBlocks.empty()) { - MachineBasicBlock* MBB = cvBlocks.pop_back_val(); - changed |= addUsesForMEMERegion(MBB, ncvBlocks); - } - if (! ncvBlocks.empty()) { - cvBlocks = ncvBlocks; - ncvBlocks.clear(); - } - } - - if (changed) { - calculateAnticAvail(Fn); - CSRSave.clear(); - CSRRestore.clear(); - } - } - - // Check for effectiveness: - // SR0 = {r | r in CSRSave[EntryBlock], CSRRestore[RB], RB in ReturnBlocks} - // numSRReduced = |(UsedCSRegs - SR0)|, approx. SR0 by CSRSave[EntryBlock] - // Gives a measure of how many CSR spills have been moved from EntryBlock - // to minimal regions enclosing their uses. - CSRegSet notSpilledInEntryBlock = (UsedCSRegs - CSRSave[EntryBlock]); - unsigned numSRReducedThisFunc = notSpilledInEntryBlock.count(); - numSRReduced += numSRReducedThisFunc; - DEBUG(if (ShrinkWrapDebugging >= BasicInfo) { - dbgs() << "-----------------------------------------------------------\n"; - dbgs() << "total iterations = " << iterations << " ( " - << Fn.getName() - << " " << numSRReducedThisFunc - << " " << Fn.size() - << " )\n"; - dbgs() << "-----------------------------------------------------------\n"; - dumpSRSets(); - dbgs() << "-----------------------------------------------------------\n"; - if (numSRReducedThisFunc) - verifySpillRestorePlacement(); - }); -} - -// Debugging methods. -#ifndef NDEBUG -/// findFastExitPath - debugging method used to detect functions -/// with at least one path from the entry block to a return block -/// directly or which has a very small number of edges. -/// -void PEI::findFastExitPath() { - if (! EntryBlock) - return; - // Fina a path from EntryBlock to any return block that does not branch: - // Entry - // | ... - // v | - // B1<-----+ - // | - // v - // Return - for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(), - SE = EntryBlock->succ_end(); SI != SE; ++SI) { - MachineBasicBlock* SUCC = *SI; - - // Assume positive, disprove existence of fast path. - HasFastExitPath = true; - - // Check the immediate successors. - if (isReturnBlock(SUCC)) { - if (ShrinkWrapDebugging >= BasicInfo) - dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock) - << "->" << getBasicBlockName(SUCC) << "\n"; - break; - } - // Traverse df from SUCC, look for a branch block. - std::string exitPath = getBasicBlockName(SUCC); - for (df_iterator<MachineBasicBlock*> BI = df_begin(SUCC), - BE = df_end(SUCC); BI != BE; ++BI) { - MachineBasicBlock* SBB = *BI; - // Reject paths with branch nodes. - if (SBB->succ_size() > 1) { - HasFastExitPath = false; - break; - } - exitPath += "->" + getBasicBlockName(SBB); - } - if (HasFastExitPath) { - if (ShrinkWrapDebugging >= BasicInfo) - dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock) - << "->" << exitPath << "\n"; - break; - } - } -} - -/// verifySpillRestorePlacement - check the current spill/restore -/// sets for safety. Attempt to find spills without restores or -/// restores without spills. -/// Spills: walk df from each MBB in spill set ensuring that -/// all CSRs spilled at MMBB are restored on all paths -/// from MBB to all exit blocks. -/// Restores: walk idf from each MBB in restore set ensuring that -/// all CSRs restored at MBB are spilled on all paths -/// reaching MBB. -/// -void PEI::verifySpillRestorePlacement() { - unsigned numReturnBlocks = 0; - for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); - MBBI != MBBE; ++MBBI) { - MachineBasicBlock* MBB = MBBI; - if (isReturnBlock(MBB) || MBB->succ_size() == 0) - ++numReturnBlocks; - } - for (CSRegBlockMap::iterator BI = CSRSave.begin(), - BE = CSRSave.end(); BI != BE; ++BI) { - MachineBasicBlock* MBB = BI->first; - CSRegSet spilled = BI->second; - CSRegSet restored; - - if (spilled.empty()) - continue; - - DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(spilled) - << " RESTORE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(CSRRestore[MBB]) << "\n"); - - if (CSRRestore[MBB].intersects(spilled)) { - restored |= (CSRRestore[MBB] & spilled); - } - - // Walk depth first from MBB to find restores of all CSRs spilled at MBB: - // we must find restores for all spills w/no intervening spills on all - // paths from MBB to all return blocks. - for (df_iterator<MachineBasicBlock*> BI = df_begin(MBB), - BE = df_end(MBB); BI != BE; ++BI) { - MachineBasicBlock* SBB = *BI; - if (SBB == MBB) - continue; - // Stop when we encounter spills of any CSRs spilled at MBB that - // have not yet been seen to be restored. - if (CSRSave[SBB].intersects(spilled) && - !restored.contains(CSRSave[SBB] & spilled)) - break; - // Collect the CSRs spilled at MBB that are restored - // at this DF successor of MBB. - if (CSRRestore[SBB].intersects(spilled)) - restored |= (CSRRestore[SBB] & spilled); - // If we are at a retun block, check that the restores - // we have seen so far exhaust the spills at MBB, then - // reset the restores. - if (isReturnBlock(SBB) || SBB->succ_size() == 0) { - if (restored != spilled) { - CSRegSet notRestored = (spilled - restored); - DEBUG(dbgs() << MF->getName() << ": " - << stringifyCSRegSet(notRestored) - << " spilled at " << getBasicBlockName(MBB) - << " are never restored on path to return " - << getBasicBlockName(SBB) << "\n"); - } - restored.clear(); - } - } - } - - // Check restore placements. - for (CSRegBlockMap::iterator BI = CSRRestore.begin(), - BE = CSRRestore.end(); BI != BE; ++BI) { - MachineBasicBlock* MBB = BI->first; - CSRegSet restored = BI->second; - CSRegSet spilled; - - if (restored.empty()) - continue; - - DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(CSRSave[MBB]) - << " RESTORE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(restored) << "\n"); - - if (CSRSave[MBB].intersects(restored)) { - spilled |= (CSRSave[MBB] & restored); - } - // Walk inverse depth first from MBB to find spills of all - // CSRs restored at MBB: - for (idf_iterator<MachineBasicBlock*> BI = idf_begin(MBB), - BE = idf_end(MBB); BI != BE; ++BI) { - MachineBasicBlock* PBB = *BI; - if (PBB == MBB) - continue; - // Stop when we encounter restores of any CSRs restored at MBB that - // have not yet been seen to be spilled. - if (CSRRestore[PBB].intersects(restored) && - !spilled.contains(CSRRestore[PBB] & restored)) - break; - // Collect the CSRs restored at MBB that are spilled - // at this DF predecessor of MBB. - if (CSRSave[PBB].intersects(restored)) - spilled |= (CSRSave[PBB] & restored); - } - if (spilled != restored) { - CSRegSet notSpilled = (restored - spilled); - DEBUG(dbgs() << MF->getName() << ": " - << stringifyCSRegSet(notSpilled) - << " restored at " << getBasicBlockName(MBB) - << " are never spilled\n"); - } - } -} - -// Debugging print methods. -std::string PEI::getBasicBlockName(const MachineBasicBlock* MBB) { - if (!MBB) - return ""; - - if (MBB->getBasicBlock()) - return MBB->getBasicBlock()->getName().str(); - - std::ostringstream name; - name << "_MBB_" << MBB->getNumber(); - return name.str(); -} - -std::string PEI::stringifyCSRegSet(const CSRegSet& s) { - const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo(); - const std::vector<CalleeSavedInfo> CSI = - MF->getFrameInfo()->getCalleeSavedInfo(); - - std::ostringstream srep; - if (CSI.size() == 0) { - srep << "[]"; - return srep.str(); - } - srep << "["; - CSRegSet::iterator I = s.begin(), E = s.end(); - if (I != E) { - unsigned reg = CSI[*I].getReg(); - srep << TRI->getName(reg); - for (++I; I != E; ++I) { - reg = CSI[*I].getReg(); - srep << ","; - srep << TRI->getName(reg); - } - } - srep << "]"; - return srep.str(); -} - -void PEI::dumpSet(const CSRegSet& s) { - DEBUG(dbgs() << stringifyCSRegSet(s) << "\n"); -} - -void PEI::dumpUsed(MachineBasicBlock* MBB) { - DEBUG({ - if (MBB) - dbgs() << "CSRUsed[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(CSRUsed[MBB]) << "\n"; - }); -} - -void PEI::dumpAllUsed() { - for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); - MBBI != MBBE; ++MBBI) { - MachineBasicBlock* MBB = MBBI; - dumpUsed(MBB); - } -} - -void PEI::dumpSets(MachineBasicBlock* MBB) { - DEBUG({ - if (MBB) - dbgs() << getBasicBlockName(MBB) << " | " - << stringifyCSRegSet(CSRUsed[MBB]) << " | " - << stringifyCSRegSet(AnticIn[MBB]) << " | " - << stringifyCSRegSet(AnticOut[MBB]) << " | " - << stringifyCSRegSet(AvailIn[MBB]) << " | " - << stringifyCSRegSet(AvailOut[MBB]) << "\n"; - }); -} - -void PEI::dumpSets1(MachineBasicBlock* MBB) { - DEBUG({ - if (MBB) - dbgs() << getBasicBlockName(MBB) << " | " - << stringifyCSRegSet(CSRUsed[MBB]) << " | " - << stringifyCSRegSet(AnticIn[MBB]) << " | " - << stringifyCSRegSet(AnticOut[MBB]) << " | " - << stringifyCSRegSet(AvailIn[MBB]) << " | " - << stringifyCSRegSet(AvailOut[MBB]) << " | " - << stringifyCSRegSet(CSRSave[MBB]) << " | " - << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; - }); -} - -void PEI::dumpAllSets() { - for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); - MBBI != MBBE; ++MBBI) { - MachineBasicBlock* MBB = MBBI; - dumpSets1(MBB); - } -} - -void PEI::dumpSRSets() { - DEBUG({ - for (MachineFunction::iterator MBB = MF->begin(), E = MF->end(); - MBB != E; ++MBB) { - if (!CSRSave[MBB].empty()) { - dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(CSRSave[MBB]); - if (CSRRestore[MBB].empty()) - dbgs() << '\n'; - } - - if (!CSRRestore[MBB].empty() && !CSRSave[MBB].empty()) - dbgs() << " " - << "RESTORE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; - } - }); -} -#endif diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index 2fc8f46..da2e710 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -42,41 +42,40 @@ STATISTIC(NumInvokes, "Number of invokes replaced"); STATISTIC(NumSpilled, "Number of registers live across unwind edges"); namespace { - class SjLjEHPrepare : public FunctionPass { - const TargetMachine *TM; - Type *FunctionContextTy; - Constant *RegisterFn; - Constant *UnregisterFn; - Constant *BuiltinSetjmpFn; - Constant *FrameAddrFn; - Constant *StackAddrFn; - Constant *StackRestoreFn; - Constant *LSDAAddrFn; - Value *PersonalityFn; - Constant *CallSiteFn; - Constant *FuncCtxFn; - AllocaInst *FuncCtx; - public: - static char ID; // Pass identification, replacement for typeid - explicit SjLjEHPrepare(const TargetMachine *TM) - : FunctionPass(ID), TM(TM) { } - bool doInitialization(Module &M); - bool runOnFunction(Function &F); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const {} - const char *getPassName() const { - return "SJLJ Exception Handling preparation"; - } +class SjLjEHPrepare : public FunctionPass { + const TargetMachine *TM; + Type *FunctionContextTy; + Constant *RegisterFn; + Constant *UnregisterFn; + Constant *BuiltinSetjmpFn; + Constant *FrameAddrFn; + Constant *StackAddrFn; + Constant *StackRestoreFn; + Constant *LSDAAddrFn; + Value *PersonalityFn; + Constant *CallSiteFn; + Constant *FuncCtxFn; + AllocaInst *FuncCtx; + +public: + static char ID; // Pass identification, replacement for typeid + explicit SjLjEHPrepare(const TargetMachine *TM) : FunctionPass(ID), TM(TM) {} + bool doInitialization(Module &M); + bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const {} + const char *getPassName() const { + return "SJLJ Exception Handling preparation"; + } - private: - bool setupEntryBlockAndCallSites(Function &F); - void substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, - Value *SelVal); - Value *setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads); - void lowerIncomingArguments(Function &F); - void lowerAcrossUnwindEdges(Function &F, ArrayRef<InvokeInst*> Invokes); - void insertCallSiteStore(Instruction *I, int Number); - }; +private: + bool setupEntryBlockAndCallSites(Function &F); + void substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, Value *SelVal); + Value *setupFunctionContext(Function &F, ArrayRef<LandingPadInst *> LPads); + void lowerIncomingArguments(Function &F); + void lowerAcrossUnwindEdges(Function &F, ArrayRef<InvokeInst *> Invokes); + void insertCallSiteStore(Instruction *I, int Number); +}; } // end anonymous namespace char SjLjEHPrepare::ID = 0; @@ -92,23 +91,19 @@ bool SjLjEHPrepare::doInitialization(Module &M) { // builtin_setjmp uses a five word jbuf Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext()); Type *Int32Ty = Type::getInt32Ty(M.getContext()); - FunctionContextTy = - StructType::get(VoidPtrTy, // __prev - Int32Ty, // call_site - ArrayType::get(Int32Ty, 4), // __data - VoidPtrTy, // __personality - VoidPtrTy, // __lsda - ArrayType::get(VoidPtrTy, 5), // __jbuf - NULL); - RegisterFn = M.getOrInsertFunction("_Unwind_SjLj_Register", - Type::getVoidTy(M.getContext()), - PointerType::getUnqual(FunctionContextTy), - (Type *)0); - UnregisterFn = - M.getOrInsertFunction("_Unwind_SjLj_Unregister", - Type::getVoidTy(M.getContext()), - PointerType::getUnqual(FunctionContextTy), - (Type *)0); + FunctionContextTy = StructType::get(VoidPtrTy, // __prev + Int32Ty, // call_site + ArrayType::get(Int32Ty, 4), // __data + VoidPtrTy, // __personality + VoidPtrTy, // __lsda + ArrayType::get(VoidPtrTy, 5), // __jbuf + NULL); + RegisterFn = M.getOrInsertFunction( + "_Unwind_SjLj_Register", Type::getVoidTy(M.getContext()), + PointerType::getUnqual(FunctionContextTy), (Type *)0); + UnregisterFn = M.getOrInsertFunction( + "_Unwind_SjLj_Unregister", Type::getVoidTy(M.getContext()), + PointerType::getUnqual(FunctionContextTy), (Type *)0); FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress); StackAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave); StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore); @@ -134,16 +129,17 @@ void SjLjEHPrepare::insertCallSiteStore(Instruction *I, int Number) { Value *CallSite = Builder.CreateGEP(FuncCtx, Idxs, "call_site"); // Insert a store of the call-site number - ConstantInt *CallSiteNoC = ConstantInt::get(Type::getInt32Ty(I->getContext()), - Number); - Builder.CreateStore(CallSiteNoC, CallSite, true/*volatile*/); + ConstantInt *CallSiteNoC = + ConstantInt::get(Type::getInt32Ty(I->getContext()), Number); + Builder.CreateStore(CallSiteNoC, CallSite, true /*volatile*/); } /// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until /// we reach blocks we've already seen. static void MarkBlocksLiveIn(BasicBlock *BB, - SmallPtrSet<BasicBlock*, 64> &LiveBBs) { - if (!LiveBBs.insert(BB)) return; // already been here. + SmallPtrSet<BasicBlock *, 64> &LiveBBs) { + if (!LiveBBs.insert(BB)) + return; // already been here. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) MarkBlocksLiveIn(*PI, LiveBBs); @@ -153,12 +149,14 @@ static void MarkBlocksLiveIn(BasicBlock *BB, /// instruction with those returned by the personality function. void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, Value *SelVal) { - SmallVector<Value*, 8> UseWorkList(LPI->use_begin(), LPI->use_end()); + SmallVector<Value *, 8> UseWorkList(LPI->use_begin(), LPI->use_end()); while (!UseWorkList.empty()) { Value *Val = UseWorkList.pop_back_val(); ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Val); - if (!EVI) continue; - if (EVI->getNumIndices() != 1) continue; + if (!EVI) + continue; + if (EVI->getNumIndices() != 1) + continue; if (*EVI->idx_begin() == 0) EVI->replaceAllUsesWith(ExnVal); else if (*EVI->idx_begin() == 1) @@ -167,14 +165,15 @@ void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, EVI->eraseFromParent(); } - if (LPI->getNumUses() == 0) return; + if (LPI->getNumUses() == 0) + return; // There are still some uses of LPI. Construct an aggregate with the exception // values and replace the LPI with that aggregate. Type *LPadType = LPI->getType(); Value *LPadVal = UndefValue::get(LPadType); - IRBuilder<> - Builder(llvm::next(BasicBlock::iterator(cast<Instruction>(SelVal)))); + IRBuilder<> Builder( + llvm::next(BasicBlock::iterator(cast<Instruction>(SelVal)))); LPadVal = Builder.CreateInsertValue(LPadVal, ExnVal, 0, "lpad.val"); LPadVal = Builder.CreateInsertValue(LPadVal, SelVal, 1, "lpad.val"); @@ -183,8 +182,8 @@ void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, /// setupFunctionContext - Allocate the function context on the stack and fill /// it with all of the data that we know at this point. -Value *SjLjEHPrepare:: -setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) { +Value *SjLjEHPrepare::setupFunctionContext(Function &F, + ArrayRef<LandingPadInst *> LPads) { BasicBlock *EntryBB = F.begin(); // Create an alloca for the incoming jump buffer ptr and the new jump buffer @@ -192,9 +191,9 @@ setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) { // because the value needs to be added to the global context list. const TargetLowering *TLI = TM->getTargetLowering(); unsigned Align = - TLI->getDataLayout()->getPrefTypeAlignment(FunctionContextTy); - FuncCtx = - new AllocaInst(FunctionContextTy, 0, Align, "fn_context", EntryBB->begin()); + TLI->getDataLayout()->getPrefTypeAlignment(FunctionContextTy); + FuncCtx = new AllocaInst(FunctionContextTy, 0, Align, "fn_context", + EntryBB->begin()); // Fill in the function context structure. for (unsigned I = 0, E = LPads.size(); I != E; ++I) { @@ -205,13 +204,13 @@ setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) { Value *FCData = Builder.CreateConstGEP2_32(FuncCtx, 0, 2, "__data"); // The exception values come back in context->__data[0]. - Value *ExceptionAddr = Builder.CreateConstGEP2_32(FCData, 0, 0, - "exception_gep"); + Value *ExceptionAddr = + Builder.CreateConstGEP2_32(FCData, 0, 0, "exception_gep"); Value *ExnVal = Builder.CreateLoad(ExceptionAddr, true, "exn_val"); ExnVal = Builder.CreateIntToPtr(ExnVal, Builder.getInt8PtrTy()); - Value *SelectorAddr = Builder.CreateConstGEP2_32(FCData, 0, 1, - "exn_selector_gep"); + Value *SelectorAddr = + Builder.CreateConstGEP2_32(FCData, 0, 1, "exn_selector_gep"); Value *SelVal = Builder.CreateLoad(SelectorAddr, true, "exn_selector_val"); substituteLPadValues(LPI, ExnVal, SelVal); @@ -221,11 +220,11 @@ setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) { IRBuilder<> Builder(EntryBB->getTerminator()); if (!PersonalityFn) PersonalityFn = LPads[0]->getPersonalityFn(); - Value *PersonalityFieldPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 3, - "pers_fn_gep"); - Builder.CreateStore(Builder.CreateBitCast(PersonalityFn, - Builder.getInt8PtrTy()), - PersonalityFieldPtr, /*isVolatile=*/true); + Value *PersonalityFieldPtr = + Builder.CreateConstGEP2_32(FuncCtx, 0, 3, "pers_fn_gep"); + Builder.CreateStore( + Builder.CreateBitCast(PersonalityFn, Builder.getInt8PtrTy()), + PersonalityFieldPtr, /*isVolatile=*/true); // LSDA address Value *LSDA = Builder.CreateCall(LSDAAddrFn, "lsda_addr"); @@ -245,8 +244,8 @@ void SjLjEHPrepare::lowerIncomingArguments(Function &F) { isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsPt)->getArraySize())) ++AfterAllocaInsPt; - for (Function::arg_iterator - AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI) { + for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE; + ++AI) { Type *Ty = AI->getType(); // Aggregate types can't be cast, but are legal argument types, so we have @@ -265,9 +264,8 @@ void SjLjEHPrepare::lowerIncomingArguments(Function &F) { // This is always a no-op cast because we're casting AI to AI->getType() // so src and destination types are identical. BitCast is the only // possibility. - CastInst *NC = - new BitCastInst(AI, AI->getType(), AI->getName() + ".tmp", - AfterAllocaInsPt); + CastInst *NC = new BitCastInst(AI, AI->getType(), AI->getName() + ".tmp", + AfterAllocaInsPt); AI->replaceAllUsesWith(NC); // Set the operand of the cast instruction back to the AllocaInst. @@ -284,20 +282,21 @@ void SjLjEHPrepare::lowerIncomingArguments(Function &F) { /// lowerAcrossUnwindEdges - Find all variables which are alive across an unwind /// edge and spill them. void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, - ArrayRef<InvokeInst*> Invokes) { + ArrayRef<InvokeInst *> Invokes) { // Finally, scan the code looking for instructions with bad live ranges. - for (Function::iterator - BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) { - for (BasicBlock::iterator - II = BB->begin(), IIE = BB->end(); II != IIE; ++II) { + for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) { + for (BasicBlock::iterator II = BB->begin(), IIE = BB->end(); II != IIE; + ++II) { // Ignore obvious cases we don't have to handle. In particular, most // instructions either have no uses or only have a single use inside the // current block. Ignore them quickly. Instruction *Inst = II; - if (Inst->use_empty()) continue; + if (Inst->use_empty()) + continue; if (Inst->hasOneUse() && cast<Instruction>(Inst->use_back())->getParent() == BB && - !isa<PHINode>(Inst->use_back())) continue; + !isa<PHINode>(Inst->use_back())) + continue; // If this is an alloca in the entry block, it's not a real register // value. @@ -306,16 +305,16 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, continue; // Avoid iterator invalidation by copying users to a temporary vector. - SmallVector<Instruction*, 16> Users; - for (Value::use_iterator - UI = Inst->use_begin(), E = Inst->use_end(); UI != E; ++UI) { + SmallVector<Instruction *, 16> Users; + for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end(); + UI != E; ++UI) { Instruction *User = cast<Instruction>(*UI); if (User->getParent() != BB || isa<PHINode>(User)) Users.push_back(User); } // Find all of the blocks that this value is live in. - SmallPtrSet<BasicBlock*, 64> LiveBBs; + SmallPtrSet<BasicBlock *, 64> LiveBBs; LiveBBs.insert(Inst->getParent()); while (!Users.empty()) { Instruction *U = Users.back(); @@ -339,7 +338,7 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest(); if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) { DEBUG(dbgs() << "SJLJ Spill: " << *Inst << " around " - << UnwindBlock->getName() << "\n"); + << UnwindBlock->getName() << "\n"); NeedsSpill = true; break; } @@ -362,15 +361,16 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, LandingPadInst *LPI = UnwindBlock->getLandingPadInst(); // Place PHIs into a set to avoid invalidating the iterator. - SmallPtrSet<PHINode*, 8> PHIsToDemote; - for (BasicBlock::iterator - PN = UnwindBlock->begin(); isa<PHINode>(PN); ++PN) + SmallPtrSet<PHINode *, 8> PHIsToDemote; + for (BasicBlock::iterator PN = UnwindBlock->begin(); isa<PHINode>(PN); ++PN) PHIsToDemote.insert(cast<PHINode>(PN)); - if (PHIsToDemote.empty()) continue; + if (PHIsToDemote.empty()) + continue; // Demote the PHIs to the stack. - for (SmallPtrSet<PHINode*, 8>::iterator - I = PHIsToDemote.begin(), E = PHIsToDemote.end(); I != E; ++I) + for (SmallPtrSet<PHINode *, 8>::iterator I = PHIsToDemote.begin(), + E = PHIsToDemote.end(); + I != E; ++I) DemotePHIToStack(*I); // Move the landingpad instruction back to the top of the landing pad block. @@ -382,9 +382,9 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, /// the function context and marking the call sites with the appropriate /// values. These values are used by the DWARF EH emitter. bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { - SmallVector<ReturnInst*, 16> Returns; - SmallVector<InvokeInst*, 16> Invokes; - SmallSetVector<LandingPadInst*, 16> LPads; + SmallVector<ReturnInst *, 16> Returns; + SmallVector<InvokeInst *, 16> Invokes; + SmallSetVector<LandingPadInst *, 16> LPads; // Look through the terminators of the basic blocks to find invokes. for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) @@ -404,7 +404,8 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { Returns.push_back(RI); } - if (Invokes.empty()) return false; + if (Invokes.empty()) + return false; NumInvokes += Invokes.size(); @@ -412,7 +413,7 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { lowerAcrossUnwindEdges(F, Invokes); Value *FuncCtx = - setupFunctionContext(F, makeArrayRef(LPads.begin(), LPads.end())); + setupFunctionContext(F, makeArrayRef(LPads.begin(), LPads.end())); BasicBlock *EntryBB = F.begin(); IRBuilder<> Builder(EntryBB->getTerminator()); @@ -446,7 +447,7 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { insertCallSiteStore(Invokes[I], I + 1); ConstantInt *CallSiteNum = - ConstantInt::get(Type::getInt32Ty(F.getContext()), I + 1); + ConstantInt::get(Type::getInt32Ty(F.getContext()), I + 1); // Record the call site value for the back end so it stays associated with // the invoke. @@ -468,8 +469,8 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { } // Register the function context and make sure it's known to not throw - CallInst *Register = CallInst::Create(RegisterFn, FuncCtx, "", - EntryBB->getTerminator()); + CallInst *Register = + CallInst::Create(RegisterFn, FuncCtx, "", EntryBB->getTerminator()); Register->setDoesNotThrow(); // Following any allocas not in the entry block, update the saved SP in the diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp index 209792f..d5b3a4a 100644 --- a/lib/CodeGen/Spiller.cpp +++ b/lib/CodeGen/Spiller.cpp @@ -77,7 +77,7 @@ protected: DEBUG(dbgs() << "Spilling everywhere " << *li << "\n"); - assert(li->weight != HUGE_VALF && + assert(li->weight != llvm::huge_valf && "Attempting to spill already spilled value."); assert(!TargetRegisterInfo::isStackSlot(li->reg) && @@ -115,15 +115,14 @@ protected: indices.push_back(i); } - // Create a new vreg & interval for this instr. - LiveInterval *newLI = &LRE.create(); - newLI->weight = HUGE_VALF; + // Create a new virtual register for the load and/or store. + unsigned NewVReg = LRE.create(); // Update the reg operands & kill flags. for (unsigned i = 0; i < indices.size(); ++i) { unsigned mopIdx = indices[i]; MachineOperand &mop = mi->getOperand(mopIdx); - mop.setReg(newLI->reg); + mop.setReg(NewVReg); if (mop.isUse() && !mi->isRegTiedToDefOperand(mopIdx)) { mop.setIsKill(true); } @@ -133,28 +132,20 @@ protected: // Insert reload if necessary. MachineBasicBlock::iterator miItr(mi); if (hasUse) { - tii->loadRegFromStackSlot(*mi->getParent(), miItr, newLI->reg, ss, trc, + MachineInstrSpan MIS(miItr); + + tii->loadRegFromStackSlot(*mi->getParent(), miItr, NewVReg, ss, trc, tri); - MachineInstr *loadInstr(prior(miItr)); - SlotIndex loadIndex = - lis->InsertMachineInstrInMaps(loadInstr).getRegSlot(); - SlotIndex endIndex = loadIndex.getNextIndex(); - VNInfo *loadVNI = - newLI->getNextValue(loadIndex, lis->getVNInfoAllocator()); - newLI->addRange(LiveRange(loadIndex, endIndex, loadVNI)); + lis->InsertMachineInstrRangeInMaps(MIS.begin(), miItr); } // Insert store if necessary. if (hasDef) { - tii->storeRegToStackSlot(*mi->getParent(), llvm::next(miItr),newLI->reg, + MachineInstrSpan MIS(miItr); + + tii->storeRegToStackSlot(*mi->getParent(), llvm::next(miItr), NewVReg, true, ss, trc, tri); - MachineInstr *storeInstr(llvm::next(miItr)); - SlotIndex storeIndex = - lis->InsertMachineInstrInMaps(storeInstr).getRegSlot(); - SlotIndex beginIndex = storeIndex.getPrevIndex(); - VNInfo *storeVNI = - newLI->getNextValue(beginIndex, lis->getVNInfoAllocator()); - newLI->addRange(LiveRange(beginIndex, storeIndex, storeVNI)); + lis->InsertMachineInstrRangeInMaps(llvm::next(miItr), MIS.end()); } } } diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index e717fac..68a15f7 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -214,7 +214,7 @@ bool SplitAnalysis::calcLiveBlockInfo() { // When not live in, the first use should be a def. if (!BI.LiveIn) { - assert(LVI->start == LVI->valno->def && "Dangling LiveRange start"); + assert(LVI->start == LVI->valno->def && "Dangling Segment start"); assert(LVI->start == BI.FirstInstr && "First instr should be a def"); BI.FirstDef = BI.FirstInstr; } @@ -245,8 +245,8 @@ bool SplitAnalysis::calcLiveBlockInfo() { BI.FirstInstr = BI.FirstDef = LVI->start; } - // A LiveRange that starts in the middle of the block must be a def. - assert(LVI->start == LVI->valno->def && "Dangling LiveRange start"); + // A Segment that starts in the middle of the block must be a def. + assert(LVI->start == LVI->valno->def && "Dangling Segment start"); if (!BI.FirstDef) BI.FirstDef = LVI->start; } @@ -377,7 +377,7 @@ VNInfo *SplitEditor::defValue(unsigned RegIdx, assert(ParentVNI && "Mapping NULL value"); assert(Idx.isValid() && "Invalid SlotIndex"); assert(Edit->getParent().getVNInfoAt(Idx) == ParentVNI && "Bad Parent VNI"); - LiveInterval *LI = Edit->get(RegIdx); + LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx)); // Create a new value. VNInfo *VNI = LI->getNextValue(Idx, LIS.getVNInfoAllocator()); @@ -395,14 +395,14 @@ VNInfo *SplitEditor::defValue(unsigned RegIdx, // If the previous value was a simple mapping, add liveness for it now. if (VNInfo *OldVNI = InsP.first->second.getPointer()) { SlotIndex Def = OldVNI->def; - LI->addRange(LiveRange(Def, Def.getDeadSlot(), OldVNI)); + LI->addSegment(LiveInterval::Segment(Def, Def.getDeadSlot(), OldVNI)); // No longer a simple mapping. Switch to a complex, non-forced mapping. InsP.first->second = ValueForcePair(); } // This is a complex mapping, add liveness for VNI SlotIndex Def = VNI->def; - LI->addRange(LiveRange(Def, Def.getDeadSlot(), VNI)); + LI->addSegment(LiveInterval::Segment(Def, Def.getDeadSlot(), VNI)); return VNI; } @@ -422,7 +422,8 @@ void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI) { // This was previously a single mapping. Make sure the old def is represented // by a trivial live range. SlotIndex Def = VNI->def; - Edit->get(RegIdx)->addRange(LiveRange(Def, Def.getDeadSlot(), VNI)); + LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx)); + LI->addSegment(LiveInterval::Segment(Def, Def.getDeadSlot(), VNI)); // Mark as complex mapped, forced. VFP = ValueForcePair(0, true); } @@ -434,7 +435,7 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx, MachineBasicBlock::iterator I) { MachineInstr *CopyMI = 0; SlotIndex Def; - LiveInterval *LI = Edit->get(RegIdx); + LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx)); // We may be trying to avoid interference that ends at a deleted instruction, // so always begin RegIdx 0 early and all others late. @@ -462,11 +463,11 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx, unsigned SplitEditor::openIntv() { // Create the complement as index 0. if (Edit->empty()) - Edit->create(); + Edit->createEmptyInterval(); // Create the open interval. OpenIdx = Edit->size(); - Edit->create(); + Edit->createEmptyInterval(); return OpenIdx; } @@ -631,7 +632,7 @@ void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) { //===----------------------------------------------------------------------===// void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) { - LiveInterval *LI = Edit->get(0); + LiveInterval *LI = &LIS.getInterval(Edit->get(0)); DEBUG(dbgs() << "Removing " << Copies.size() << " back-copies.\n"); RegAssignMap::iterator AssignI; AssignI.setMap(RegAssign); @@ -730,7 +731,7 @@ SplitEditor::findShallowDominator(MachineBasicBlock *MBB, void SplitEditor::hoistCopiesForSize() { // Get the complement interval, always RegIdx 0. - LiveInterval *LI = Edit->get(0); + LiveInterval *LI = &LIS.getInterval(Edit->get(0)); LiveInterval *Parent = &Edit->getParent(); // Track the nearest common dominator for all back-copies for each ParentVNI, @@ -861,13 +862,13 @@ bool SplitEditor::transferValues() { // The interval [Start;End) is continuously mapped to RegIdx, ParentVNI. DEBUG(dbgs() << " [" << Start << ';' << End << ")=" << RegIdx); - LiveInterval *LI = Edit->get(RegIdx); + LiveRange &LR = LIS.getInterval(Edit->get(RegIdx)); // Check for a simply defined value that can be blitted directly. ValueForcePair VFP = Values.lookup(std::make_pair(RegIdx, ParentVNI->id)); if (VNInfo *VNI = VFP.getPointer()) { DEBUG(dbgs() << ':' << VNI->id); - LI->addRange(LiveRange(Start, End, VNI)); + LR.addSegment(LiveInterval::Segment(Start, End, VNI)); Start = End; continue; } @@ -891,7 +892,7 @@ bool SplitEditor::transferValues() { // The first block may be live-in, or it may have its own def. if (Start != BlockStart) { - VNInfo *VNI = LI->extendInBlock(BlockStart, std::min(BlockEnd, End)); + VNInfo *VNI = LR.extendInBlock(BlockStart, std::min(BlockEnd, End)); assert(VNI && "Missing def for complex mapped value"); DEBUG(dbgs() << ':' << VNI->id << "*BB#" << MBB->getNumber()); // MBB has its own def. Is it also live-out? @@ -911,7 +912,7 @@ bool SplitEditor::transferValues() { if (BlockStart == ParentVNI->def) { // This block has the def of a parent PHI, so it isn't live-in. assert(ParentVNI->isPHIDef() && "Non-phi defined at block start?"); - VNInfo *VNI = LI->extendInBlock(BlockStart, std::min(BlockEnd, End)); + VNInfo *VNI = LR.extendInBlock(BlockStart, std::min(BlockEnd, End)); assert(VNI && "Missing def for complex mapped parent PHI"); if (End >= BlockEnd) LRC.setLiveOutValue(MBB, VNI); // Live-out as well. @@ -919,10 +920,10 @@ bool SplitEditor::transferValues() { // This block needs a live-in value. The last block covered may not // be live-out. if (End < BlockEnd) - LRC.addLiveInBlock(LI, MDT[MBB], End); + LRC.addLiveInBlock(LR, MDT[MBB], End); else { // Live-through, and we don't know the value. - LRC.addLiveInBlock(LI, MDT[MBB]); + LRC.addLiveInBlock(LR, MDT[MBB]); LRC.setLiveOutValue(MBB, 0); } } @@ -949,7 +950,7 @@ void SplitEditor::extendPHIKillRanges() { if (PHIVNI->isUnused() || !PHIVNI->isPHIDef()) continue; unsigned RegIdx = RegAssign.lookup(PHIVNI->def); - LiveInterval *LI = Edit->get(RegIdx); + LiveRange &LR = LIS.getInterval(Edit->get(RegIdx)); LiveRangeCalc &LRC = getLRCalc(RegIdx); MachineBasicBlock *MBB = LIS.getMBBFromIndex(PHIVNI->def); for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), @@ -961,7 +962,7 @@ void SplitEditor::extendPHIKillRanges() { if (Edit->getParent().liveAt(LastUse)) { assert(RegAssign.lookup(LastUse) == RegIdx && "Different register assignment in phi predecessor"); - LRC.extend(LI, End); + LRC.extend(LR, End); } } } @@ -990,7 +991,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) { // Rewrite to the mapped register at Idx. unsigned RegIdx = RegAssign.lookup(Idx); - LiveInterval *LI = Edit->get(RegIdx); + LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx)); MO.setReg(LI->reg); DEBUG(dbgs() << " rewr BB#" << MI->getParent()->getNumber() << '\t' << Idx << ':' << RegIdx << '\t' << *MI); @@ -1011,14 +1012,14 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) { } else Idx = Idx.getRegSlot(true); - getLRCalc(RegIdx).extend(LI, Idx.getNextSlot()); + getLRCalc(RegIdx).extend(*LI, Idx.getNextSlot()); } } void SplitEditor::deleteRematVictims() { SmallVector<MachineInstr*, 8> Dead; for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I){ - LiveInterval *LI = *I; + LiveInterval *LI = &LIS.getInterval(*I); for (LiveInterval::const_iterator LII = LI->begin(), LIE = LI->end(); LII != LIE; ++LII) { // Dead defs end at the dead slot. @@ -1091,8 +1092,10 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { deleteRematVictims(); // Get rid of unused values and set phi-kill flags. - for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I) - (*I)->RenumberValues(LIS); + for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I) { + LiveInterval &LI = LIS.getInterval(*I); + LI.RenumberValues(); + } // Provide a reverse mapping from original indices to Edit ranges. if (LRMap) { @@ -1105,7 +1108,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { ConnectedVNInfoEqClasses ConEQ(LIS); for (unsigned i = 0, e = Edit->size(); i != e; ++i) { // Don't use iterators, they are invalidated by create() below. - LiveInterval *li = Edit->get(i); + LiveInterval *li = &LIS.getInterval(Edit->get(i)); unsigned NumComp = ConEQ.Classify(li); if (NumComp <= 1) continue; @@ -1113,7 +1116,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { SmallVector<LiveInterval*, 8> dups; dups.push_back(li); for (unsigned j = 1; j != NumComp; ++j) - dups.push_back(&Edit->create()); + dups.push_back(&Edit->createEmptyInterval()); ConEQ.Distribute(&dups[0], MRI); // The new intervals all map back to i. if (LRMap) diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp index faaa6e7..3dbc050 100644 --- a/lib/CodeGen/StackColoring.cpp +++ b/lib/CodeGen/StackColoring.cpp @@ -170,7 +170,7 @@ private: /// slots to use the joint slots. void remapInstructions(DenseMap<int, int> &SlotRemap); - /// The input program may contain intructions which are not inside lifetime + /// The input program may contain instructions which are not inside lifetime /// markers. This can happen due to a bug in the compiler or due to a bug in /// user code (for example, returning a reference to a local variable). /// This procedure checks all of the instructions in the function and @@ -450,14 +450,14 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) { SlotIndex F = Finishes[i]; if (S < F) { // We have a single consecutive region. - Intervals[i]->addRange(LiveRange(S, F, ValNum)); + Intervals[i]->addSegment(LiveInterval::Segment(S, F, ValNum)); } else { // We have two non consecutive regions. This happens when // LIFETIME_START appears after the LIFETIME_END marker. SlotIndex NewStart = Indexes->getMBBStartIdx(MBB); SlotIndex NewFin = Indexes->getMBBEndIdx(MBB); - Intervals[i]->addRange(LiveRange(NewStart, F, ValNum)); - Intervals[i]->addRange(LiveRange(S, NewFin, ValNum)); + Intervals[i]->addSegment(LiveInterval::Segment(NewStart, F, ValNum)); + Intervals[i]->addSegment(LiveInterval::Segment(S, NewFin, ValNum)); } } } @@ -763,7 +763,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { // Merge disjoint slots. if (!First->overlaps(*Second)) { Changed = true; - First->MergeRangesInAsValue(*Second, First->getValNumInfo(0)); + First->MergeSegmentsInAsValue(*Second, First->getValNumInfo(0)); SlotRemap[SecondSlot] = FirstSlot; SortedSlots[J] = -1; DEBUG(dbgs()<<"Merging #"<<FirstSlot<<" and slots #"<< diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp new file mode 100644 index 0000000..40893ea --- /dev/null +++ b/lib/CodeGen/StackMaps.cpp @@ -0,0 +1,314 @@ +//===---------------------------- StackMaps.cpp ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "stackmaps" + +#include "llvm/CodeGen/StackMaps.h" + +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetOpcodes.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" + +#include <iterator> + +using namespace llvm; + +PatchPointOpers::PatchPointOpers(const MachineInstr *MI): + MI(MI), + HasDef(MI->getOperand(0).isReg() && MI->getOperand(0).isDef() && + !MI->getOperand(0).isImplicit()), + IsAnyReg(MI->getOperand(getMetaIdx(CCPos)).getImm() == CallingConv::AnyReg) { + +#ifndef NDEBUG + { + unsigned CheckStartIdx = 0, e = MI->getNumOperands(); + while (CheckStartIdx < e && MI->getOperand(CheckStartIdx).isReg() && + MI->getOperand(CheckStartIdx).isDef() && + !MI->getOperand(CheckStartIdx).isImplicit()) + ++CheckStartIdx; + + assert(getMetaIdx() == CheckStartIdx && + "Unexpected additonal definition in Patchpoint intrinsic."); + } +#endif +} + +unsigned PatchPointOpers::getNextScratchIdx(unsigned StartIdx) const { + if (!StartIdx) + StartIdx = getVarIdx(); + + // Find the next scratch register (implicit def and early clobber) + unsigned ScratchIdx = StartIdx, e = MI->getNumOperands(); + while (ScratchIdx < e && + !(MI->getOperand(ScratchIdx).isReg() && + MI->getOperand(ScratchIdx).isDef() && + MI->getOperand(ScratchIdx).isImplicit() && + MI->getOperand(ScratchIdx).isEarlyClobber())) + ++ScratchIdx; + + assert(ScratchIdx != e && "No scratch register available"); + return ScratchIdx; +} + +void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint32_t ID, + MachineInstr::const_mop_iterator MOI, + MachineInstr::const_mop_iterator MOE, + bool recordResult) { + + MCContext &OutContext = AP.OutStreamer.getContext(); + MCSymbol *MILabel = OutContext.CreateTempSymbol(); + AP.OutStreamer.EmitLabel(MILabel); + + LocationVec CallsiteLocs; + + if (recordResult) { + std::pair<Location, MachineInstr::const_mop_iterator> ParseResult = + OpParser(MI.operands_begin(), llvm::next(MI.operands_begin()), AP.TM); + + Location &Loc = ParseResult.first; + assert(Loc.LocType == Location::Register && + "Stackmap return location must be a register."); + CallsiteLocs.push_back(Loc); + } + + while (MOI != MOE) { + std::pair<Location, MachineInstr::const_mop_iterator> ParseResult = + OpParser(MOI, MOE, AP.TM); + + Location &Loc = ParseResult.first; + + // Move large constants into the constant pool. + if (Loc.LocType == Location::Constant && (Loc.Offset & ~0xFFFFFFFFULL)) { + Loc.LocType = Location::ConstantIndex; + Loc.Offset = ConstPool.getConstantIndex(Loc.Offset); + } + + CallsiteLocs.push_back(Loc); + MOI = ParseResult.second; + } + + const MCExpr *CSOffsetExpr = MCBinaryExpr::CreateSub( + MCSymbolRefExpr::Create(MILabel, OutContext), + MCSymbolRefExpr::Create(AP.CurrentFnSym, OutContext), + OutContext); + + CSInfos.push_back(CallsiteInfo(CSOffsetExpr, ID, CallsiteLocs)); +} + +static MachineInstr::const_mop_iterator +getStackMapEndMOP(MachineInstr::const_mop_iterator MOI, + MachineInstr::const_mop_iterator MOE) { + for (; MOI != MOE; ++MOI) + if (MOI->isRegMask() || (MOI->isReg() && MOI->isImplicit())) + break; + + return MOI; +} + +void StackMaps::recordStackMap(const MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::STACKMAP && "exected stackmap"); + + int64_t ID = MI.getOperand(0).getImm(); + assert((int32_t)ID == ID && "Stack maps hold 32-bit IDs"); + recordStackMapOpers(MI, ID, llvm::next(MI.operands_begin(), 2), + getStackMapEndMOP(MI.operands_begin(), + MI.operands_end())); +} + +void StackMaps::recordPatchPoint(const MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::PATCHPOINT && "exected stackmap"); + + PatchPointOpers opers(&MI); + int64_t ID = opers.getMetaOper(PatchPointOpers::IDPos).getImm(); + assert((int32_t)ID == ID && "Stack maps hold 32-bit IDs"); + MachineInstr::const_mop_iterator MOI = + llvm::next(MI.operands_begin(), opers.getStackMapStartIdx()); + recordStackMapOpers(MI, ID, MOI, getStackMapEndMOP(MOI, MI.operands_end()), + opers.isAnyReg() && opers.hasDef()); + +#ifndef NDEBUG + // verify anyregcc + LocationVec &Locations = CSInfos.back().Locations; + if (opers.isAnyReg()) { + unsigned NArgs = opers.getMetaOper(PatchPointOpers::NArgPos).getImm(); + for (unsigned i = 0, e = (opers.hasDef() ? NArgs+1 : NArgs); i != e; ++i) + assert(Locations[i].LocType == Location::Register && + "anyreg arg must be in reg."); + } +#endif +} + +/// serializeToStackMapSection conceptually populates the following fields: +/// +/// uint32 : Reserved (header) +/// uint32 : NumConstants +/// int64 : Constants[NumConstants] +/// uint32 : NumRecords +/// StkMapRecord[NumRecords] { +/// uint32 : PatchPoint ID +/// uint32 : Instruction Offset +/// uint16 : Reserved (record flags) +/// uint16 : NumLocations +/// Location[NumLocations] { +/// uint8 : Register | Direct | Indirect | Constant | ConstantIndex +/// uint8 : Size in Bytes +/// uint16 : Dwarf RegNum +/// int32 : Offset +/// } +/// } +/// +/// Location Encoding, Type, Value: +/// 0x1, Register, Reg (value in register) +/// 0x2, Direct, Reg + Offset (frame index) +/// 0x3, Indirect, [Reg + Offset] (spilled value) +/// 0x4, Constant, Offset (small constant) +/// 0x5, ConstIndex, Constants[Offset] (large constant) +/// +void StackMaps::serializeToStackMapSection() { + // Bail out if there's no stack map data. + if (CSInfos.empty()) + return; + + MCContext &OutContext = AP.OutStreamer.getContext(); + const TargetRegisterInfo *TRI = AP.TM.getRegisterInfo(); + + // Create the section. + const MCSection *StackMapSection = + OutContext.getObjectFileInfo()->getStackMapSection(); + AP.OutStreamer.SwitchSection(StackMapSection); + + // Emit a dummy symbol to force section inclusion. + AP.OutStreamer.EmitLabel( + OutContext.GetOrCreateSymbol(Twine("__LLVM_StackMaps"))); + + // Serialize data. + const char *WSMP = "Stack Maps: "; + (void)WSMP; + const MCRegisterInfo &MCRI = *OutContext.getRegisterInfo(); + + DEBUG(dbgs() << "********** Stack Map Output **********\n"); + + // Header. + AP.OutStreamer.EmitIntValue(0, 4); + + // Num constants. + AP.OutStreamer.EmitIntValue(ConstPool.getNumConstants(), 4); + + // Constant pool entries. + for (unsigned i = 0; i < ConstPool.getNumConstants(); ++i) + AP.OutStreamer.EmitIntValue(ConstPool.getConstant(i), 8); + + DEBUG(dbgs() << WSMP << "#callsites = " << CSInfos.size() << "\n"); + AP.OutStreamer.EmitIntValue(CSInfos.size(), 4); + + for (CallsiteInfoList::const_iterator CSII = CSInfos.begin(), + CSIE = CSInfos.end(); + CSII != CSIE; ++CSII) { + + unsigned CallsiteID = CSII->ID; + const LocationVec &CSLocs = CSII->Locations; + + DEBUG(dbgs() << WSMP << "callsite " << CallsiteID << "\n"); + + // Verify stack map entry. It's better to communicate a problem to the + // runtime than crash in case of in-process compilation. Currently, we do + // simple overflow checks, but we may eventually communicate other + // compilation errors this way. + if (CSLocs.size() > UINT16_MAX) { + AP.OutStreamer.EmitIntValue(UINT32_MAX, 4); // Invalid ID. + AP.OutStreamer.EmitValue(CSII->CSOffsetExpr, 4); + AP.OutStreamer.EmitIntValue(0, 2); // Reserved. + AP.OutStreamer.EmitIntValue(0, 2); // 0 locations. + continue; + } + + AP.OutStreamer.EmitIntValue(CallsiteID, 4); + AP.OutStreamer.EmitValue(CSII->CSOffsetExpr, 4); + + // Reserved for flags. + AP.OutStreamer.EmitIntValue(0, 2); + + DEBUG(dbgs() << WSMP << " has " << CSLocs.size() << " locations\n"); + + AP.OutStreamer.EmitIntValue(CSLocs.size(), 2); + + unsigned operIdx = 0; + for (LocationVec::const_iterator LocI = CSLocs.begin(), LocE = CSLocs.end(); + LocI != LocE; ++LocI, ++operIdx) { + const Location &Loc = *LocI; + DEBUG( + dbgs() << WSMP << " Loc " << operIdx << ": "; + switch (Loc.LocType) { + case Location::Unprocessed: + dbgs() << "<Unprocessed operand>"; + break; + case Location::Register: + dbgs() << "Register " << MCRI.getName(Loc.Reg); + break; + case Location::Direct: + dbgs() << "Direct " << MCRI.getName(Loc.Reg); + if (Loc.Offset) + dbgs() << " + " << Loc.Offset; + break; + case Location::Indirect: + dbgs() << "Indirect " << MCRI.getName(Loc.Reg) + << " + " << Loc.Offset; + break; + case Location::Constant: + dbgs() << "Constant " << Loc.Offset; + break; + case Location::ConstantIndex: + dbgs() << "Constant Index " << Loc.Offset; + break; + } + dbgs() << "\n"; + ); + + unsigned RegNo = 0; + int Offset = Loc.Offset; + if(Loc.Reg) { + RegNo = MCRI.getDwarfRegNum(Loc.Reg, false); + for (MCSuperRegIterator SR(Loc.Reg, TRI); + SR.isValid() && (int)RegNo < 0; ++SR) { + RegNo = TRI->getDwarfRegNum(*SR, false); + } + // If this is a register location, put the subregister byte offset in + // the location offset. + if (Loc.LocType == Location::Register) { + assert(!Loc.Offset && "Register location should have zero offset"); + unsigned LLVMRegNo = MCRI.getLLVMRegNum(RegNo, false); + unsigned SubRegIdx = MCRI.getSubRegIndex(LLVMRegNo, Loc.Reg); + if (SubRegIdx) + Offset = MCRI.getSubRegIdxOffset(SubRegIdx); + } + } + else { + assert(Loc.LocType != Location::Register && + "Missing location register"); + } + AP.OutStreamer.EmitIntValue(Loc.LocType, 1); + AP.OutStreamer.EmitIntValue(Loc.Size, 1); + AP.OutStreamer.EmitIntValue(RegNo, 2); + AP.OutStreamer.EmitIntValue(Offset, 4); + } + } + + AP.OutStreamer.AddBlankLine(); + + CSInfos.clear(); +} diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index 4c56380..9020449 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -15,11 +15,13 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "stack-protector" +#include "llvm/CodeGen/StackProtector.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/Triple.h" #include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -27,12 +29,12 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" -#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Target/TargetLowering.h" #include <cstdlib> using namespace llvm; @@ -40,137 +42,93 @@ STATISTIC(NumFunProtected, "Number of functions protected"); STATISTIC(NumAddrTaken, "Number of local variables that have their address" " taken."); -namespace { - class StackProtector : public FunctionPass { - const TargetMachine *TM; - - /// TLI - Keep a pointer of a TargetLowering to consult for determining - /// target type sizes. - const TargetLoweringBase *TLI; - const Triple Trip; - - Function *F; - Module *M; - - DominatorTree *DT; - - /// \brief The minimum size of buffers that will receive stack smashing - /// protection when -fstack-protection is used. - unsigned SSPBufferSize; - - /// VisitedPHIs - The set of PHI nodes visited when determining - /// if a variable's reference has been taken. This set - /// is maintained to ensure we don't visit the same PHI node multiple - /// times. - SmallPtrSet<const PHINode*, 16> VisitedPHIs; - - /// InsertStackProtectors - Insert code into the prologue and epilogue of - /// the function. - /// - /// - The prologue code loads and stores the stack guard onto the stack. - /// - The epilogue checks the value stored in the prologue against the - /// original value. It calls __stack_chk_fail if they differ. - bool InsertStackProtectors(); - - /// CreateFailBB - Create a basic block to jump to when the stack protector - /// check fails. - BasicBlock *CreateFailBB(); - - /// ContainsProtectableArray - Check whether the type either is an array or - /// contains an array of sufficient size so that we need stack protectors - /// for it. - bool ContainsProtectableArray(Type *Ty, bool Strong = false, - bool InStruct = false) const; - - /// \brief Check whether a stack allocation has its address taken. - bool HasAddressTaken(const Instruction *AI); - - /// RequiresStackProtector - Check whether or not this function needs a - /// stack protector based upon the stack protector level. - bool RequiresStackProtector(); - public: - static char ID; // Pass identification, replacement for typeid. - StackProtector() : FunctionPass(ID), TM(0), TLI(0), SSPBufferSize(0) { - initializeStackProtectorPass(*PassRegistry::getPassRegistry()); - } - StackProtector(const TargetMachine *TM) - : FunctionPass(ID), TM(TM), TLI(0), Trip(TM->getTargetTriple()), - SSPBufferSize(8) { - initializeStackProtectorPass(*PassRegistry::getPassRegistry()); - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addPreserved<DominatorTree>(); - } - - virtual bool runOnFunction(Function &Fn); - }; -} // end anonymous namespace +static cl::opt<bool> EnableSelectionDAGSP("enable-selectiondag-sp", + cl::init(true), cl::Hidden); char StackProtector::ID = 0; -INITIALIZE_PASS(StackProtector, "stack-protector", - "Insert stack protectors", false, false) +INITIALIZE_PASS(StackProtector, "stack-protector", "Insert stack protectors", + false, true) FunctionPass *llvm::createStackProtectorPass(const TargetMachine *TM) { return new StackProtector(TM); } +StackProtector::SSPLayoutKind +StackProtector::getSSPLayout(const AllocaInst *AI) const { + return AI ? Layout.lookup(AI) : SSPLK_None; +} + bool StackProtector::runOnFunction(Function &Fn) { F = &Fn; M = F->getParent(); DT = getAnalysisIfAvailable<DominatorTree>(); TLI = TM->getTargetLowering(); - if (!RequiresStackProtector()) return false; + if (!RequiresStackProtector()) + return false; - Attribute Attr = - Fn.getAttributes().getAttribute(AttributeSet::FunctionIndex, - "stack-protector-buffer-size"); + Attribute Attr = Fn.getAttributes().getAttribute( + AttributeSet::FunctionIndex, "stack-protector-buffer-size"); if (Attr.isStringAttribute()) - SSPBufferSize = atoi(Attr.getValueAsString().data()); + Attr.getValueAsString().getAsInteger(10, SSPBufferSize); ++NumFunProtected; return InsertStackProtectors(); } -/// ContainsProtectableArray - Check whether the type either is an array or -/// contains a char array of sufficient size so that we need stack protectors -/// for it. -bool StackProtector::ContainsProtectableArray(Type *Ty, bool Strong, +/// \param [out] IsLarge is set to true if a protectable array is found and +/// it is "large" ( >= ssp-buffer-size). In the case of a structure with +/// multiple arrays, this gets set if any of them is large. +bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge, + bool Strong, bool InStruct) const { - if (!Ty) return false; + if (!Ty) + return false; if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) { - // In strong mode any array, regardless of type and size, triggers a - // protector - if (Strong) - return true; if (!AT->getElementType()->isIntegerTy(8)) { // If we're on a non-Darwin platform or we're inside of a structure, don't // add stack protectors unless the array is a character array. - if (InStruct || !Trip.isOSDarwin()) - return false; + // However, in strong mode any array, regardless of type and size, + // triggers a protector. + if (!Strong && (InStruct || !Trip.isOSDarwin())) + return false; } // If an array has more than SSPBufferSize bytes of allocated space, then we // emit stack protectors. - if (SSPBufferSize <= TLI->getDataLayout()->getTypeAllocSize(AT)) + if (SSPBufferSize <= TLI->getDataLayout()->getTypeAllocSize(AT)) { + IsLarge = true; + return true; + } + + if (Strong) + // Require a protector for all arrays in strong mode return true; } const StructType *ST = dyn_cast<StructType>(Ty); - if (!ST) return false; + if (!ST) + return false; + bool NeedsProtector = false; for (StructType::element_iterator I = ST->element_begin(), - E = ST->element_end(); I != E; ++I) - if (ContainsProtectableArray(*I, Strong, true)) - return true; + E = ST->element_end(); + I != E; ++I) + if (ContainsProtectableArray(*I, IsLarge, Strong, true)) { + // If the element is a protectable array and is large (>= SSPBufferSize) + // then we are done. If the protectable array is not large, then + // keep looking in case a subsequent element is a large array. + if (IsLarge) + return true; + NeedsProtector = true; + } - return false; + return NeedsProtector; } bool StackProtector::HasAddressTaken(const Instruction *AI) { for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end(); - UI != UE; ++UI) { + UI != UE; ++UI) { const User *U = *UI; if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { if (AI == SI->getValueOperand()) @@ -217,11 +175,13 @@ bool StackProtector::HasAddressTaken(const Instruction *AI) { /// address taken. bool StackProtector::RequiresStackProtector() { bool Strong = false; + bool NeedsProtector = false; if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::StackProtectReq)) - return true; - else if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::StackProtectStrong)) + Attribute::StackProtectReq)) { + NeedsProtector = true; + Strong = true; // Use the same heuristic as strong to determine SSPLayout + } else if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackProtectStrong)) Strong = true; else if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, Attribute::StackProtect)) @@ -230,39 +190,116 @@ bool StackProtector::RequiresStackProtector() { for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { BasicBlock *BB = I; - for (BasicBlock::iterator - II = BB->begin(), IE = BB->end(); II != IE; ++II) { + for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE; + ++II) { if (AllocaInst *AI = dyn_cast<AllocaInst>(II)) { if (AI->isArrayAllocation()) { // SSP-Strong: Enable protectors for any call to alloca, regardless // of size. if (Strong) return true; - + if (const ConstantInt *CI = - dyn_cast<ConstantInt>(AI->getArraySize())) { - if (CI->getLimitedValue(SSPBufferSize) >= SSPBufferSize) + dyn_cast<ConstantInt>(AI->getArraySize())) { + if (CI->getLimitedValue(SSPBufferSize) >= SSPBufferSize) { // A call to alloca with size >= SSPBufferSize requires // stack protectors. - return true; + Layout.insert(std::make_pair(AI, SSPLK_LargeArray)); + NeedsProtector = true; + } else if (Strong) { + // Require protectors for all alloca calls in strong mode. + Layout.insert(std::make_pair(AI, SSPLK_SmallArray)); + NeedsProtector = true; + } } else { // A call to alloca with a variable size requires protectors. - return true; + Layout.insert(std::make_pair(AI, SSPLK_LargeArray)); + NeedsProtector = true; } + continue; } - if (ContainsProtectableArray(AI->getAllocatedType(), Strong)) - return true; + bool IsLarge = false; + if (ContainsProtectableArray(AI->getAllocatedType(), IsLarge, Strong)) { + Layout.insert(std::make_pair(AI, IsLarge ? SSPLK_LargeArray + : SSPLK_SmallArray)); + NeedsProtector = true; + continue; + } if (Strong && HasAddressTaken(AI)) { - ++NumAddrTaken; - return true; + ++NumAddrTaken; + Layout.insert(std::make_pair(AI, SSPLK_AddrOf)); + NeedsProtector = true; } } } } - return false; + return NeedsProtector; +} + +static bool InstructionWillNotHaveChain(const Instruction *I) { + return !I->mayHaveSideEffects() && !I->mayReadFromMemory() && + isSafeToSpeculativelyExecute(I); +} + +/// Identify if RI has a previous instruction in the "Tail Position" and return +/// it. Otherwise return 0. +/// +/// This is based off of the code in llvm::isInTailCallPosition. The difference +/// is that it inverts the first part of llvm::isInTailCallPosition since +/// isInTailCallPosition is checking if a call is in a tail call position, and +/// we are searching for an unknown tail call that might be in the tail call +/// position. Once we find the call though, the code uses the same refactored +/// code, returnTypeIsEligibleForTailCall. +static CallInst *FindPotentialTailCall(BasicBlock *BB, ReturnInst *RI, + const TargetLoweringBase *TLI) { + // Establish a reasonable upper bound on the maximum amount of instructions we + // will look through to find a tail call. + unsigned SearchCounter = 0; + const unsigned MaxSearch = 4; + bool NoInterposingChain = true; + + for (BasicBlock::reverse_iterator I = llvm::next(BB->rbegin()), + E = BB->rend(); + I != E && SearchCounter < MaxSearch; ++I) { + Instruction *Inst = &*I; + + // Skip over debug intrinsics and do not allow them to affect our MaxSearch + // counter. + if (isa<DbgInfoIntrinsic>(Inst)) + continue; + + // If we find a call and the following conditions are satisifed, then we + // have found a tail call that satisfies at least the target independent + // requirements of a tail call: + // + // 1. The call site has the tail marker. + // + // 2. The call site either will not cause the creation of a chain or if a + // chain is necessary there are no instructions in between the callsite and + // the call which would create an interposing chain. + // + // 3. The return type of the function does not impede tail call + // optimization. + if (CallInst *CI = dyn_cast<CallInst>(Inst)) { + if (CI->isTailCall() && + (InstructionWillNotHaveChain(CI) || NoInterposingChain) && + returnTypeIsEligibleForTailCall(BB->getParent(), CI, RI, *TLI)) + return CI; + } + + // If we did not find a call see if we have an instruction that may create + // an interposing chain. + NoInterposingChain = + NoInterposingChain && InstructionWillNotHaveChain(Inst); + + // Increment max search. + SearchCounter++; + } + + return 0; } /// Insert code into the entry block that stores the __stack_chk_guard @@ -273,36 +310,36 @@ bool StackProtector::RequiresStackProtector() { /// StackGuard = load __stack_chk_guard /// call void @llvm.stackprotect.create(StackGuard, StackGuardSlot) /// -static void CreatePrologue(Function *F, Module *M, ReturnInst *RI, +/// Returns true if the platform/triple supports the stackprotectorcreate pseudo +/// node. +static bool CreatePrologue(Function *F, Module *M, ReturnInst *RI, const TargetLoweringBase *TLI, const Triple &Trip, AllocaInst *&AI, Value *&StackGuardVar) { + bool SupportsSelectionDAGSP = false; PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext()); unsigned AddressSpace, Offset; if (TLI->getStackCookieLocation(AddressSpace, Offset)) { Constant *OffsetVal = - ConstantInt::get(Type::getInt32Ty(RI->getContext()), Offset); - - StackGuardVar = ConstantExpr::getIntToPtr(OffsetVal, - PointerType::get(PtrTy, - AddressSpace)); + ConstantInt::get(Type::getInt32Ty(RI->getContext()), Offset); + + StackGuardVar = ConstantExpr::getIntToPtr( + OffsetVal, PointerType::get(PtrTy, AddressSpace)); } else if (Trip.getOS() == llvm::Triple::OpenBSD) { StackGuardVar = M->getOrInsertGlobal("__guard_local", PtrTy); cast<GlobalValue>(StackGuardVar) - ->setVisibility(GlobalValue::HiddenVisibility); + ->setVisibility(GlobalValue::HiddenVisibility); } else { + SupportsSelectionDAGSP = true; StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy); } - - BasicBlock &Entry = F->getEntryBlock(); - Instruction *InsPt = &Entry.front(); - - AI = new AllocaInst(PtrTy, "StackGuardSlot", InsPt); - LoadInst *LI = new LoadInst(StackGuardVar, "StackGuard", false, InsPt); - - Value *Args[] = { LI, AI }; - CallInst:: - Create(Intrinsic::getDeclaration(M, Intrinsic::stackprotector), - Args, "", InsPt); + + IRBuilder<> B(&F->getEntryBlock().front()); + AI = B.CreateAlloca(PtrTy, 0, "StackGuardSlot"); + LoadInst *LI = B.CreateLoad(StackGuardVar, "StackGuard"); + B.CreateCall2(Intrinsic::getDeclaration(M, Intrinsic::stackprotector), LI, + AI); + + return SupportsSelectionDAGSP; } /// InsertStackProtectors - Insert code into the prologue and epilogue of the @@ -312,72 +349,102 @@ static void CreatePrologue(Function *F, Module *M, ReturnInst *RI, /// - The epilogue checks the value stored in the prologue against the original /// value. It calls __stack_chk_fail if they differ. bool StackProtector::InsertStackProtectors() { - BasicBlock *FailBB = 0; // The basic block to jump to if check fails. - BasicBlock *FailBBDom = 0; // FailBB's dominator. - AllocaInst *AI = 0; // Place on stack that stores the stack guard. - Value *StackGuardVar = 0; // The stack guard variable. + bool HasPrologue = false; + bool SupportsSelectionDAGSP = + EnableSelectionDAGSP && !TM->Options.EnableFastISel; + AllocaInst *AI = 0; // Place on stack that stores the stack guard. + Value *StackGuardVar = 0; // The stack guard variable. - for (Function::iterator I = F->begin(), E = F->end(); I != E; ) { + for (Function::iterator I = F->begin(), E = F->end(); I != E;) { BasicBlock *BB = I++; ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()); - if (!RI) continue; + if (!RI) + continue; - if (!FailBB) { - CreatePrologue(F, M, RI, TLI, Trip, AI, StackGuardVar); - // Create the basic block to jump to when the guard check fails. - FailBB = CreateFailBB(); + if (!HasPrologue) { + HasPrologue = true; + SupportsSelectionDAGSP &= + CreatePrologue(F, M, RI, TLI, Trip, AI, StackGuardVar); } - // For each block with a return instruction, convert this: - // - // return: - // ... - // ret ... - // - // into this: - // - // return: - // ... - // %1 = load __stack_chk_guard - // %2 = load StackGuardSlot - // %3 = cmp i1 %1, %2 - // br i1 %3, label %SP_return, label %CallStackCheckFailBlk - // - // SP_return: - // ret ... - // - // CallStackCheckFailBlk: - // call void @__stack_chk_fail() - // unreachable - - // Split the basic block before the return instruction. - BasicBlock *NewBB = BB->splitBasicBlock(RI, "SP_return"); + if (SupportsSelectionDAGSP) { + // Since we have a potential tail call, insert the special stack check + // intrinsic. + Instruction *InsertionPt = 0; + if (CallInst *CI = FindPotentialTailCall(BB, RI, TLI)) { + InsertionPt = CI; + } else { + InsertionPt = RI; + // At this point we know that BB has a return statement so it *DOES* + // have a terminator. + assert(InsertionPt != 0 && "BB must have a terminator instruction at " + "this point."); + } - if (DT && DT->isReachableFromEntry(BB)) { - DT->addNewBlock(NewBB, BB); - FailBBDom = FailBBDom ? DT->findNearestCommonDominator(FailBBDom, BB) :BB; - } + Function *Intrinsic = + Intrinsic::getDeclaration(M, Intrinsic::stackprotectorcheck); + CallInst::Create(Intrinsic, StackGuardVar, "", InsertionPt); + + } else { + // If we do not support SelectionDAG based tail calls, generate IR level + // tail calls. + // + // For each block with a return instruction, convert this: + // + // return: + // ... + // ret ... + // + // into this: + // + // return: + // ... + // %1 = load __stack_chk_guard + // %2 = load StackGuardSlot + // %3 = cmp i1 %1, %2 + // br i1 %3, label %SP_return, label %CallStackCheckFailBlk + // + // SP_return: + // ret ... + // + // CallStackCheckFailBlk: + // call void @__stack_chk_fail() + // unreachable + + // Create the FailBB. We duplicate the BB every time since the MI tail + // merge pass will merge together all of the various BB into one including + // fail BB generated by the stack protector pseudo instruction. + BasicBlock *FailBB = CreateFailBB(); + + // Split the basic block before the return instruction. + BasicBlock *NewBB = BB->splitBasicBlock(RI, "SP_return"); + + // Update the dominator tree if we need to. + if (DT && DT->isReachableFromEntry(BB)) { + DT->addNewBlock(NewBB, BB); + DT->addNewBlock(FailBB, BB); + } - // Remove default branch instruction to the new BB. - BB->getTerminator()->eraseFromParent(); + // Remove default branch instruction to the new BB. + BB->getTerminator()->eraseFromParent(); - // Move the newly created basic block to the point right after the old basic - // block so that it's in the "fall through" position. - NewBB->moveAfter(BB); + // Move the newly created basic block to the point right after the old + // basic block so that it's in the "fall through" position. + NewBB->moveAfter(BB); - // Generate the stack protector instructions in the old basic block. - LoadInst *LI1 = new LoadInst(StackGuardVar, "", false, BB); - LoadInst *LI2 = new LoadInst(AI, "", true, BB); - ICmpInst *Cmp = new ICmpInst(*BB, CmpInst::ICMP_EQ, LI1, LI2, ""); - BranchInst::Create(NewBB, FailBB, Cmp, BB); + // Generate the stack protector instructions in the old basic block. + IRBuilder<> B(BB); + LoadInst *LI1 = B.CreateLoad(StackGuardVar); + LoadInst *LI2 = B.CreateLoad(AI); + Value *Cmp = B.CreateICmpEQ(LI1, LI2); + B.CreateCondBr(Cmp, NewBB, FailBB); + } } // Return if we didn't modify any basic blocks. I.e., there are no return // statements in the function. - if (!FailBB) return false; - - if (DT && FailBBDom) - DT->addNewBlock(FailBB, FailBBDom); + if (!HasPrologue) + return false; return true; } @@ -387,29 +454,18 @@ bool StackProtector::InsertStackProtectors() { BasicBlock *StackProtector::CreateFailBB() { LLVMContext &Context = F->getContext(); BasicBlock *FailBB = BasicBlock::Create(Context, "CallStackCheckFailBlk", F); + IRBuilder<> B(FailBB); if (Trip.getOS() == llvm::Triple::OpenBSD) { Constant *StackChkFail = M->getOrInsertFunction( "__stack_smash_handler", Type::getVoidTy(Context), Type::getInt8PtrTy(Context), NULL); - Constant *NameStr = ConstantDataArray::getString(Context, F->getName()); - Constant *FuncName = - new GlobalVariable(*M, NameStr->getType(), true, - GlobalVariable::PrivateLinkage, NameStr, "SSH"); - - SmallVector<Constant *, 2> IdxList; - IdxList.push_back(ConstantInt::get(Type::getInt8Ty(Context), 0)); - IdxList.push_back(ConstantInt::get(Type::getInt8Ty(Context), 0)); - - SmallVector<Value *, 1> Args; - Args.push_back(ConstantExpr::getGetElementPtr(FuncName, IdxList)); - - CallInst::Create(StackChkFail, Args, "", FailBB); + B.CreateCall(StackChkFail, B.CreateGlobalStringPtr(F->getName(), "SSH")); } else { Constant *StackChkFail = M->getOrInsertFunction( "__stack_chk_fail", Type::getVoidTy(Context), NULL); - CallInst::Create(StackChkFail, "", FailBB); + B.CreateCall(StackChkFail); } - new UnreachableInst(Context, FailBB); + B.CreateUnreachable(); return FailBB; } diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp deleted file mode 100644 index b337c53..0000000 --- a/lib/CodeGen/StrongPHIElimination.cpp +++ /dev/null @@ -1,825 +0,0 @@ -//===- StrongPHIElimination.cpp - Eliminate PHI nodes by inserting copies -===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass eliminates PHI instructions by aggressively coalescing the copies -// that would be inserted by a naive algorithm and only inserting the copies -// that are necessary. The coalescing technique initially assumes that all -// registers appearing in a PHI instruction do not interfere. It then eliminates -// proven interferences, using dominators to only perform a linear number of -// interference tests instead of the quadratic number of interference tests -// that this would naively require. This is a technique derived from: -// -// Budimlic, et al. Fast copy coalescing and live-range identification. -// In Proceedings of the ACM SIGPLAN 2002 Conference on Programming Language -// Design and Implementation (Berlin, Germany, June 17 - 19, 2002). -// PLDI '02. ACM, New York, NY, 25-32. -// -// The original implementation constructs a data structure they call a dominance -// forest for this purpose. The dominance forest was shown to be unnecessary, -// as it is possible to emulate the creation and traversal of a dominance forest -// by directly using the dominator tree, rather than actually constructing the -// dominance forest. This technique is explained in: -// -// Boissinot, et al. Revisiting Out-of-SSA Translation for Correctness, Code -// Quality and Efficiency, -// In Proceedings of the 7th annual IEEE/ACM International Symposium on Code -// Generation and Optimization (Seattle, Washington, March 22 - 25, 2009). -// CGO '09. IEEE, Washington, DC, 114-125. -// -// Careful implementation allows for all of the dominator forest interference -// checks to be performed at once in a single depth-first traversal of the -// dominator tree, which is what is implemented here. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "strongphielim" -#include "llvm/CodeGen/Passes.h" -#include "PHIEliminationUtils.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Support/Debug.h" -#include "llvm/Target/TargetInstrInfo.h" -using namespace llvm; - -namespace { - class StrongPHIElimination : public MachineFunctionPass { - public: - static char ID; // Pass identification, replacement for typeid - StrongPHIElimination() : MachineFunctionPass(ID) { - initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry()); - } - - virtual void getAnalysisUsage(AnalysisUsage&) const; - bool runOnMachineFunction(MachineFunction&); - - private: - /// This struct represents a single node in the union-find data structure - /// representing the variable congruence classes. There is one difference - /// from a normal union-find data structure. We steal two bits from the parent - /// pointer . One of these bits is used to represent whether the register - /// itself has been isolated, and the other is used to represent whether the - /// PHI with that register as its destination has been isolated. - /// - /// Note that this leads to the strange situation where the leader of a - /// congruence class may no longer logically be a member, due to being - /// isolated. - struct Node { - enum Flags { - kRegisterIsolatedFlag = 1, - kPHIIsolatedFlag = 2 - }; - Node(unsigned v) : value(v), rank(0) { parent.setPointer(this); } - - Node *getLeader(); - - PointerIntPair<Node*, 2> parent; - unsigned value; - unsigned rank; - }; - - /// Add a register in a new congruence class containing only itself. - void addReg(unsigned); - - /// Join the congruence classes of two registers. This function is biased - /// towards the left argument, i.e. after - /// - /// addReg(r2); - /// unionRegs(r1, r2); - /// - /// the leader of the unioned congruence class is the same as the leader of - /// r1's congruence class prior to the union. This is actually relied upon - /// in the copy insertion code. - void unionRegs(unsigned, unsigned); - - /// Get the color of a register. The color is 0 if the register has been - /// isolated. - unsigned getRegColor(unsigned); - - // Isolate a register. - void isolateReg(unsigned); - - /// Get the color of a PHI. The color of a PHI is 0 if the PHI has been - /// isolated. Otherwise, it is the original color of its destination and - /// all of its operands (before they were isolated, if they were). - unsigned getPHIColor(MachineInstr*); - - /// Isolate a PHI. - void isolatePHI(MachineInstr*); - - /// Traverses a basic block, splitting any interferences found between - /// registers in the same congruence class. It takes two DenseMaps as - /// arguments that it also updates: CurrentDominatingParent, which maps - /// a color to the register in that congruence class whose definition was - /// most recently seen, and ImmediateDominatingParent, which maps a register - /// to the register in the same congruence class that most immediately - /// dominates it. - /// - /// This function assumes that it is being called in a depth-first traversal - /// of the dominator tree. - void SplitInterferencesForBasicBlock( - MachineBasicBlock&, - DenseMap<unsigned, unsigned> &CurrentDominatingParent, - DenseMap<unsigned, unsigned> &ImmediateDominatingParent); - - // Lowers a PHI instruction, inserting copies of the source and destination - // registers as necessary. - void InsertCopiesForPHI(MachineInstr*, MachineBasicBlock*); - - // Merges the live interval of Reg into NewReg and renames Reg to NewReg - // everywhere that Reg appears. Requires Reg and NewReg to have non- - // overlapping lifetimes. - void MergeLIsAndRename(unsigned Reg, unsigned NewReg); - - MachineRegisterInfo *MRI; - const TargetInstrInfo *TII; - MachineDominatorTree *DT; - LiveIntervals *LI; - - BumpPtrAllocator Allocator; - - DenseMap<unsigned, Node*> RegNodeMap; - - // Maps a basic block to a list of its defs of registers that appear as PHI - // sources. - DenseMap<MachineBasicBlock*, std::vector<MachineInstr*> > PHISrcDefs; - - // Maps a color to a pair of a MachineInstr* and a virtual register, which - // is the operand of that PHI corresponding to the current basic block. - DenseMap<unsigned, std::pair<MachineInstr*, unsigned> > CurrentPHIForColor; - - // FIXME: Can these two data structures be combined? Would a std::multimap - // be any better? - - // Stores pairs of predecessor basic blocks and the source registers of - // inserted copy instructions. - typedef DenseSet<std::pair<MachineBasicBlock*, unsigned> > SrcCopySet; - SrcCopySet InsertedSrcCopySet; - - // Maps pairs of predecessor basic blocks and colors to their defining copy - // instructions. - typedef DenseMap<std::pair<MachineBasicBlock*, unsigned>, MachineInstr*> - SrcCopyMap; - SrcCopyMap InsertedSrcCopyMap; - - // Maps inserted destination copy registers to their defining copy - // instructions. - typedef DenseMap<unsigned, MachineInstr*> DestCopyMap; - DestCopyMap InsertedDestCopies; - }; - - struct MIIndexCompare { - MIIndexCompare(LiveIntervals *LiveIntervals) : LI(LiveIntervals) { } - - bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { - return LI->getInstructionIndex(LHS) < LI->getInstructionIndex(RHS); - } - - LiveIntervals *LI; - }; -} // namespace - -STATISTIC(NumPHIsLowered, "Number of PHIs lowered"); -STATISTIC(NumDestCopiesInserted, "Number of destination copies inserted"); -STATISTIC(NumSrcCopiesInserted, "Number of source copies inserted"); - -char StrongPHIElimination::ID = 0; -INITIALIZE_PASS_BEGIN(StrongPHIElimination, "strong-phi-node-elimination", - "Eliminate PHI nodes for register allocation, intelligently", false, false) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_DEPENDENCY(SlotIndexes) -INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_END(StrongPHIElimination, "strong-phi-node-elimination", - "Eliminate PHI nodes for register allocation, intelligently", false, false) - -char &llvm::StrongPHIEliminationID = StrongPHIElimination::ID; - -void StrongPHIElimination::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - AU.addRequired<MachineDominatorTree>(); - AU.addRequired<SlotIndexes>(); - AU.addPreserved<SlotIndexes>(); - AU.addRequired<LiveIntervals>(); - AU.addPreserved<LiveIntervals>(); - MachineFunctionPass::getAnalysisUsage(AU); -} - -static MachineOperand *findLastUse(MachineBasicBlock *MBB, unsigned Reg) { - // FIXME: This only needs to check from the first terminator, as only the - // first terminator can use a virtual register. - for (MachineBasicBlock::reverse_iterator RI = MBB->rbegin(); ; ++RI) { - assert (RI != MBB->rend()); - MachineInstr *MI = &*RI; - - for (MachineInstr::mop_iterator OI = MI->operands_begin(), - OE = MI->operands_end(); OI != OE; ++OI) { - MachineOperand &MO = *OI; - if (MO.isReg() && MO.isUse() && MO.getReg() == Reg) - return &MO; - } - } -} - -bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) { - MRI = &MF.getRegInfo(); - TII = MF.getTarget().getInstrInfo(); - DT = &getAnalysis<MachineDominatorTree>(); - LI = &getAnalysis<LiveIntervals>(); - - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); - I != E; ++I) { - for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); - BBI != BBE && BBI->isPHI(); ++BBI) { - unsigned DestReg = BBI->getOperand(0).getReg(); - addReg(DestReg); - PHISrcDefs[I].push_back(BBI); - - for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) { - MachineOperand &SrcMO = BBI->getOperand(i); - unsigned SrcReg = SrcMO.getReg(); - addReg(SrcReg); - unionRegs(DestReg, SrcReg); - - MachineInstr *DefMI = MRI->getVRegDef(SrcReg); - if (DefMI) - PHISrcDefs[DefMI->getParent()].push_back(DefMI); - } - } - } - - // Perform a depth-first traversal of the dominator tree, splitting - // interferences amongst PHI-congruence classes. - DenseMap<unsigned, unsigned> CurrentDominatingParent; - DenseMap<unsigned, unsigned> ImmediateDominatingParent; - for (df_iterator<MachineDomTreeNode*> DI = df_begin(DT->getRootNode()), - DE = df_end(DT->getRootNode()); DI != DE; ++DI) { - SplitInterferencesForBasicBlock(*DI->getBlock(), - CurrentDominatingParent, - ImmediateDominatingParent); - } - - // Insert copies for all PHI source and destination registers. - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); - I != E; ++I) { - for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); - BBI != BBE && BBI->isPHI(); ++BBI) { - InsertCopiesForPHI(BBI, I); - } - } - - // FIXME: Preserve the equivalence classes during copy insertion and use - // the preversed equivalence classes instead of recomputing them. - RegNodeMap.clear(); - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); - I != E; ++I) { - for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); - BBI != BBE && BBI->isPHI(); ++BBI) { - unsigned DestReg = BBI->getOperand(0).getReg(); - addReg(DestReg); - - for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) { - unsigned SrcReg = BBI->getOperand(i).getReg(); - addReg(SrcReg); - unionRegs(DestReg, SrcReg); - } - } - } - - DenseMap<unsigned, unsigned> RegRenamingMap; - bool Changed = false; - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); - I != E; ++I) { - MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); - while (BBI != BBE && BBI->isPHI()) { - MachineInstr *PHI = BBI; - - assert(PHI->getNumOperands() > 0); - - unsigned SrcReg = PHI->getOperand(1).getReg(); - unsigned SrcColor = getRegColor(SrcReg); - unsigned NewReg = RegRenamingMap[SrcColor]; - if (!NewReg) { - NewReg = SrcReg; - RegRenamingMap[SrcColor] = SrcReg; - } - MergeLIsAndRename(SrcReg, NewReg); - - unsigned DestReg = PHI->getOperand(0).getReg(); - if (!InsertedDestCopies.count(DestReg)) - MergeLIsAndRename(DestReg, NewReg); - - for (unsigned i = 3; i < PHI->getNumOperands(); i += 2) { - unsigned SrcReg = PHI->getOperand(i).getReg(); - MergeLIsAndRename(SrcReg, NewReg); - } - - ++BBI; - LI->RemoveMachineInstrFromMaps(PHI); - PHI->eraseFromParent(); - Changed = true; - } - } - - // Due to the insertion of copies to split live ranges, the live intervals are - // guaranteed to not overlap, except in one case: an original PHI source and a - // PHI destination copy. In this case, they have the same value and thus don't - // truly intersect, so we merge them into the value live at that point. - // FIXME: Is there some better way we can handle this? - for (DestCopyMap::iterator I = InsertedDestCopies.begin(), - E = InsertedDestCopies.end(); I != E; ++I) { - unsigned DestReg = I->first; - unsigned DestColor = getRegColor(DestReg); - unsigned NewReg = RegRenamingMap[DestColor]; - - LiveInterval &DestLI = LI->getInterval(DestReg); - LiveInterval &NewLI = LI->getInterval(NewReg); - - assert(DestLI.ranges.size() == 1 - && "PHI destination copy's live interval should be a single live " - "range from the beginning of the BB to the copy instruction."); - LiveRange *DestLR = DestLI.begin(); - VNInfo *NewVNI = NewLI.getVNInfoAt(DestLR->start); - if (!NewVNI) { - NewVNI = NewLI.createValueCopy(DestLR->valno, LI->getVNInfoAllocator()); - MachineInstr *CopyInstr = I->second; - CopyInstr->getOperand(1).setIsKill(true); - } - - LiveRange NewLR(DestLR->start, DestLR->end, NewVNI); - NewLI.addRange(NewLR); - - LI->removeInterval(DestReg); - MRI->replaceRegWith(DestReg, NewReg); - } - - // Adjust the live intervals of all PHI source registers to handle the case - // where the PHIs in successor blocks were the only later uses of the source - // register. - for (SrcCopySet::iterator I = InsertedSrcCopySet.begin(), - E = InsertedSrcCopySet.end(); I != E; ++I) { - MachineBasicBlock *MBB = I->first; - unsigned SrcReg = I->second; - if (unsigned RenamedRegister = RegRenamingMap[getRegColor(SrcReg)]) - SrcReg = RenamedRegister; - - LiveInterval &SrcLI = LI->getInterval(SrcReg); - - bool isLiveOut = false; - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) { - if (SrcLI.liveAt(LI->getMBBStartIdx(*SI))) { - isLiveOut = true; - break; - } - } - - if (isLiveOut) - continue; - - MachineOperand *LastUse = findLastUse(MBB, SrcReg); - assert(LastUse); - SlotIndex LastUseIndex = LI->getInstructionIndex(LastUse->getParent()); - SrcLI.removeRange(LastUseIndex.getRegSlot(), LI->getMBBEndIdx(MBB)); - LastUse->setIsKill(true); - } - - Allocator.Reset(); - RegNodeMap.clear(); - PHISrcDefs.clear(); - InsertedSrcCopySet.clear(); - InsertedSrcCopyMap.clear(); - InsertedDestCopies.clear(); - - return Changed; -} - -void StrongPHIElimination::addReg(unsigned Reg) { - Node *&N = RegNodeMap[Reg]; - if (!N) - N = new (Allocator) Node(Reg); -} - -StrongPHIElimination::Node* -StrongPHIElimination::Node::getLeader() { - Node *N = this; - Node *Parent = parent.getPointer(); - Node *Grandparent = Parent->parent.getPointer(); - - while (Parent != Grandparent) { - N->parent.setPointer(Grandparent); - N = Grandparent; - Parent = Parent->parent.getPointer(); - Grandparent = Parent->parent.getPointer(); - } - - return Parent; -} - -unsigned StrongPHIElimination::getRegColor(unsigned Reg) { - DenseMap<unsigned, Node*>::iterator RI = RegNodeMap.find(Reg); - if (RI == RegNodeMap.end()) - return 0; - Node *Node = RI->second; - if (Node->parent.getInt() & Node::kRegisterIsolatedFlag) - return 0; - return Node->getLeader()->value; -} - -void StrongPHIElimination::unionRegs(unsigned Reg1, unsigned Reg2) { - Node *Node1 = RegNodeMap[Reg1]->getLeader(); - Node *Node2 = RegNodeMap[Reg2]->getLeader(); - - if (Node1->rank > Node2->rank) { - Node2->parent.setPointer(Node1->getLeader()); - } else if (Node1->rank < Node2->rank) { - Node1->parent.setPointer(Node2->getLeader()); - } else if (Node1 != Node2) { - Node2->parent.setPointer(Node1->getLeader()); - Node1->rank++; - } -} - -void StrongPHIElimination::isolateReg(unsigned Reg) { - Node *Node = RegNodeMap[Reg]; - Node->parent.setInt(Node->parent.getInt() | Node::kRegisterIsolatedFlag); -} - -unsigned StrongPHIElimination::getPHIColor(MachineInstr *PHI) { - assert(PHI->isPHI()); - - unsigned DestReg = PHI->getOperand(0).getReg(); - Node *DestNode = RegNodeMap[DestReg]; - if (DestNode->parent.getInt() & Node::kPHIIsolatedFlag) - return 0; - - for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) { - unsigned SrcColor = getRegColor(PHI->getOperand(i).getReg()); - if (SrcColor) - return SrcColor; - } - return 0; -} - -void StrongPHIElimination::isolatePHI(MachineInstr *PHI) { - assert(PHI->isPHI()); - Node *Node = RegNodeMap[PHI->getOperand(0).getReg()]; - Node->parent.setInt(Node->parent.getInt() | Node::kPHIIsolatedFlag); -} - -/// SplitInterferencesForBasicBlock - traverses a basic block, splitting any -/// interferences found between registers in the same congruence class. It -/// takes two DenseMaps as arguments that it also updates: -/// -/// 1) CurrentDominatingParent, which maps a color to the register in that -/// congruence class whose definition was most recently seen. -/// -/// 2) ImmediateDominatingParent, which maps a register to the register in the -/// same congruence class that most immediately dominates it. -/// -/// This function assumes that it is being called in a depth-first traversal -/// of the dominator tree. -/// -/// The algorithm used here is a generalization of the dominance-based SSA test -/// for two variables. If there are variables a_1, ..., a_n such that -/// -/// def(a_1) dom ... dom def(a_n), -/// -/// then we can test for an interference between any two a_i by only using O(n) -/// interference tests between pairs of variables. If i < j and a_i and a_j -/// interfere, then a_i is alive at def(a_j), so it is also alive at def(a_i+1). -/// Thus, in order to test for an interference involving a_i, we need only check -/// for a potential interference with a_i+1. -/// -/// This method can be generalized to arbitrary sets of variables by performing -/// a depth-first traversal of the dominator tree. As we traverse down a branch -/// of the dominator tree, we keep track of the current dominating variable and -/// only perform an interference test with that variable. However, when we go to -/// another branch of the dominator tree, the definition of the current dominating -/// variable may no longer dominate the current block. In order to correct this, -/// we need to use a stack of past choices of the current dominating variable -/// and pop from this stack until we find a variable whose definition actually -/// dominates the current block. -/// -/// There will be one push on this stack for each variable that has become the -/// current dominating variable, so instead of using an explicit stack we can -/// simply associate the previous choice for a current dominating variable with -/// the new choice. This works better in our implementation, where we test for -/// interference in multiple distinct sets at once. -void -StrongPHIElimination::SplitInterferencesForBasicBlock( - MachineBasicBlock &MBB, - DenseMap<unsigned, unsigned> &CurrentDominatingParent, - DenseMap<unsigned, unsigned> &ImmediateDominatingParent) { - // Sort defs by their order in the original basic block, as the code below - // assumes that it is processing definitions in dominance order. - std::vector<MachineInstr*> &DefInstrs = PHISrcDefs[&MBB]; - std::sort(DefInstrs.begin(), DefInstrs.end(), MIIndexCompare(LI)); - - for (std::vector<MachineInstr*>::const_iterator BBI = DefInstrs.begin(), - BBE = DefInstrs.end(); BBI != BBE; ++BBI) { - for (MachineInstr::const_mop_iterator I = (*BBI)->operands_begin(), - E = (*BBI)->operands_end(); I != E; ++I) { - const MachineOperand &MO = *I; - - // FIXME: This would be faster if it were possible to bail out of checking - // an instruction's operands after the explicit defs, but this is incorrect - // for variadic instructions, which may appear before register allocation - // in the future. - if (!MO.isReg() || !MO.isDef()) - continue; - - unsigned DestReg = MO.getReg(); - if (!DestReg || !TargetRegisterInfo::isVirtualRegister(DestReg)) - continue; - - // If the virtual register being defined is not used in any PHI or has - // already been isolated, then there are no more interferences to check. - unsigned DestColor = getRegColor(DestReg); - if (!DestColor) - continue; - - // The input to this pass sometimes is not in SSA form in every basic - // block, as some virtual registers have redefinitions. We could eliminate - // this by fixing the passes that generate the non-SSA code, or we could - // handle it here by tracking defining machine instructions rather than - // virtual registers. For now, we just handle the situation conservatively - // in a way that will possibly lead to false interferences. - unsigned &CurrentParent = CurrentDominatingParent[DestColor]; - unsigned NewParent = CurrentParent; - if (NewParent == DestReg) - continue; - - // Pop registers from the stack represented by ImmediateDominatingParent - // until we find a parent that dominates the current instruction. - while (NewParent && (!DT->dominates(MRI->getVRegDef(NewParent), *BBI) - || !getRegColor(NewParent))) - NewParent = ImmediateDominatingParent[NewParent]; - - // If NewParent is nonzero, then its definition dominates the current - // instruction, so it is only necessary to check for the liveness of - // NewParent in order to check for an interference. - if (NewParent - && LI->getInterval(NewParent).liveAt(LI->getInstructionIndex(*BBI))) { - // If there is an interference, always isolate the new register. This - // could be improved by using a heuristic that decides which of the two - // registers to isolate. - isolateReg(DestReg); - CurrentParent = NewParent; - } else { - // If there is no interference, update ImmediateDominatingParent and set - // the CurrentDominatingParent for this color to the current register. - ImmediateDominatingParent[DestReg] = NewParent; - CurrentParent = DestReg; - } - } - } - - // We now walk the PHIs in successor blocks and check for interferences. This - // is necessary because the use of a PHI's operands are logically contained in - // the predecessor block. The def of a PHI's destination register is processed - // along with the other defs in a basic block. - - CurrentPHIForColor.clear(); - - for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(), - SE = MBB.succ_end(); SI != SE; ++SI) { - for (MachineBasicBlock::iterator BBI = (*SI)->begin(), BBE = (*SI)->end(); - BBI != BBE && BBI->isPHI(); ++BBI) { - MachineInstr *PHI = BBI; - - // If a PHI is already isolated, either by being isolated directly or - // having all of its operands isolated, ignore it. - unsigned Color = getPHIColor(PHI); - if (!Color) - continue; - - // Find the index of the PHI operand that corresponds to this basic block. - unsigned PredIndex; - for (PredIndex = 1; PredIndex < PHI->getNumOperands(); PredIndex += 2) { - if (PHI->getOperand(PredIndex + 1).getMBB() == &MBB) - break; - } - assert(PredIndex < PHI->getNumOperands()); - unsigned PredOperandReg = PHI->getOperand(PredIndex).getReg(); - - // Pop registers from the stack represented by ImmediateDominatingParent - // until we find a parent that dominates the current instruction. - unsigned &CurrentParent = CurrentDominatingParent[Color]; - unsigned NewParent = CurrentParent; - while (NewParent - && (!DT->dominates(MRI->getVRegDef(NewParent)->getParent(), &MBB) - || !getRegColor(NewParent))) - NewParent = ImmediateDominatingParent[NewParent]; - CurrentParent = NewParent; - - // If there is an interference with a register, always isolate the - // register rather than the PHI. It is also possible to isolate the - // PHI, but that introduces copies for all of the registers involved - // in that PHI. - if (NewParent && LI->isLiveOutOfMBB(LI->getInterval(NewParent), &MBB) - && NewParent != PredOperandReg) - isolateReg(NewParent); - - std::pair<MachineInstr*, unsigned> - &CurrentPHI = CurrentPHIForColor[Color]; - - // If two PHIs have the same operand from every shared predecessor, then - // they don't actually interfere. Otherwise, isolate the current PHI. This - // could possibly be improved, e.g. we could isolate the PHI with the - // fewest operands. - if (CurrentPHI.first && CurrentPHI.second != PredOperandReg) - isolatePHI(PHI); - else - CurrentPHI = std::make_pair(PHI, PredOperandReg); - } - } -} - -void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI, - MachineBasicBlock *MBB) { - assert(PHI->isPHI()); - ++NumPHIsLowered; - unsigned PHIColor = getPHIColor(PHI); - - for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) { - MachineOperand &SrcMO = PHI->getOperand(i); - - // If a source is defined by an implicit def, there is no need to insert a - // copy in the predecessor. - if (SrcMO.isUndef()) - continue; - - unsigned SrcReg = SrcMO.getReg(); - assert(TargetRegisterInfo::isVirtualRegister(SrcReg) && - "Machine PHI Operands must all be virtual registers!"); - - MachineBasicBlock *PredBB = PHI->getOperand(i + 1).getMBB(); - unsigned SrcColor = getRegColor(SrcReg); - - // If neither the PHI nor the operand were isolated, then we only need to - // set the phi-kill flag on the VNInfo at this PHI. - if (PHIColor && SrcColor == PHIColor) { - LiveInterval &SrcInterval = LI->getInterval(SrcReg); - SlotIndex PredIndex = LI->getMBBEndIdx(PredBB); - VNInfo *SrcVNI = SrcInterval.getVNInfoBefore(PredIndex); - (void)SrcVNI; - assert(SrcVNI); - continue; - } - - unsigned CopyReg = 0; - if (PHIColor) { - SrcCopyMap::const_iterator I - = InsertedSrcCopyMap.find(std::make_pair(PredBB, PHIColor)); - CopyReg - = I != InsertedSrcCopyMap.end() ? I->second->getOperand(0).getReg() : 0; - } - - if (!CopyReg) { - const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); - CopyReg = MRI->createVirtualRegister(RC); - - MachineBasicBlock::iterator - CopyInsertPoint = findPHICopyInsertPoint(PredBB, MBB, SrcReg); - unsigned SrcSubReg = SrcMO.getSubReg(); - MachineInstr *CopyInstr = BuildMI(*PredBB, - CopyInsertPoint, - PHI->getDebugLoc(), - TII->get(TargetOpcode::COPY), - CopyReg).addReg(SrcReg, 0, SrcSubReg); - LI->InsertMachineInstrInMaps(CopyInstr); - ++NumSrcCopiesInserted; - - // addLiveRangeToEndOfBlock() also adds the phikill flag to the VNInfo for - // the newly added range. - LI->addLiveRangeToEndOfBlock(CopyReg, CopyInstr); - InsertedSrcCopySet.insert(std::make_pair(PredBB, SrcReg)); - - addReg(CopyReg); - if (PHIColor) { - unionRegs(PHIColor, CopyReg); - assert(getRegColor(CopyReg) != CopyReg); - } else { - PHIColor = CopyReg; - assert(getRegColor(CopyReg) == CopyReg); - } - - // Insert into map if not already there. - InsertedSrcCopyMap.insert(std::make_pair(std::make_pair(PredBB, PHIColor), - CopyInstr)); - } - - SrcMO.setReg(CopyReg); - - // If SrcReg is not live beyond the PHI, trim its interval so that it is no - // longer live-in to MBB. Note that SrcReg may appear in other PHIs that are - // processed later, but this is still correct to do at this point because we - // never rely on LiveIntervals being correct while inserting copies. - // FIXME: Should this just count uses at PHIs like the normal PHIElimination - // pass does? - LiveInterval &SrcLI = LI->getInterval(SrcReg); - SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB); - SlotIndex PHIIndex = LI->getInstructionIndex(PHI); - SlotIndex NextInstrIndex = PHIIndex.getNextIndex(); - if (SrcLI.liveAt(MBBStartIndex) && SrcLI.expiredAt(NextInstrIndex)) - SrcLI.removeRange(MBBStartIndex, PHIIndex, true); - } - - unsigned DestReg = PHI->getOperand(0).getReg(); - unsigned DestColor = getRegColor(DestReg); - - if (PHIColor && DestColor == PHIColor) { - LiveInterval &DestLI = LI->getInterval(DestReg); - - // Set the phi-def flag for the VN at this PHI. - SlotIndex PHIIndex = LI->getInstructionIndex(PHI); - VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getRegSlot()); - assert(DestVNI); - - // Prior to PHI elimination, the live ranges of PHIs begin at their defining - // instruction. After PHI elimination, PHI instructions are replaced by VNs - // with the phi-def flag set, and the live ranges of these VNs start at the - // beginning of the basic block. - SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB); - DestVNI->def = MBBStartIndex; - DestLI.addRange(LiveRange(MBBStartIndex, - PHIIndex.getRegSlot(), - DestVNI)); - return; - } - - const TargetRegisterClass *RC = MRI->getRegClass(DestReg); - unsigned CopyReg = MRI->createVirtualRegister(RC); - - MachineInstr *CopyInstr = BuildMI(*MBB, - MBB->SkipPHIsAndLabels(MBB->begin()), - PHI->getDebugLoc(), - TII->get(TargetOpcode::COPY), - DestReg).addReg(CopyReg); - LI->InsertMachineInstrInMaps(CopyInstr); - PHI->getOperand(0).setReg(CopyReg); - ++NumDestCopiesInserted; - - // Add the region from the beginning of MBB to the copy instruction to - // CopyReg's live interval, and give the VNInfo the phidef flag. - LiveInterval &CopyLI = LI->getOrCreateInterval(CopyReg); - SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB); - SlotIndex DestCopyIndex = LI->getInstructionIndex(CopyInstr); - VNInfo *CopyVNI = CopyLI.getNextValue(MBBStartIndex, - LI->getVNInfoAllocator()); - CopyLI.addRange(LiveRange(MBBStartIndex, - DestCopyIndex.getRegSlot(), - CopyVNI)); - - // Adjust DestReg's live interval to adjust for its new definition at - // CopyInstr. - LiveInterval &DestLI = LI->getOrCreateInterval(DestReg); - SlotIndex PHIIndex = LI->getInstructionIndex(PHI); - DestLI.removeRange(PHIIndex.getRegSlot(), DestCopyIndex.getRegSlot()); - - VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getRegSlot()); - assert(DestVNI); - DestVNI->def = DestCopyIndex.getRegSlot(); - - InsertedDestCopies[CopyReg] = CopyInstr; -} - -void StrongPHIElimination::MergeLIsAndRename(unsigned Reg, unsigned NewReg) { - if (Reg == NewReg) - return; - - LiveInterval &OldLI = LI->getInterval(Reg); - LiveInterval &NewLI = LI->getInterval(NewReg); - - // Merge the live ranges of the two registers. - DenseMap<VNInfo*, VNInfo*> VNMap; - for (LiveInterval::iterator LRI = OldLI.begin(), LRE = OldLI.end(); - LRI != LRE; ++LRI) { - LiveRange OldLR = *LRI; - VNInfo *OldVN = OldLR.valno; - - VNInfo *&NewVN = VNMap[OldVN]; - if (!NewVN) { - NewVN = NewLI.createValueCopy(OldVN, LI->getVNInfoAllocator()); - VNMap[OldVN] = NewVN; - } - - LiveRange LR(OldLR.start, OldLR.end, NewVN); - NewLI.addRange(LR); - } - - // Remove the LiveInterval for the register being renamed and replace all - // of its defs and uses with the new register. - LI->removeInterval(Reg); - MRI->replaceRegWith(Reg, NewReg); -} diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp index 8a1d567..ff0181e 100644 --- a/lib/CodeGen/TailDuplication.cpp +++ b/lib/CodeGen/TailDuplication.cpp @@ -638,8 +638,6 @@ bothUsedInPHI(const MachineBasicBlock &A, bool TailDuplicatePass::canCompletelyDuplicateBB(MachineBasicBlock &BB) { - SmallPtrSet<MachineBasicBlock*, 8> Succs(BB.succ_begin(), BB.succ_end()); - for (MachineBasicBlock::pred_iterator PI = BB.pred_begin(), PE = BB.pred_end(); PI != PE; ++PI) { MachineBasicBlock *PredBB = *PI; diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp index bb8bd42..bf4fd65 100644 --- a/lib/CodeGen/TargetInstrInfo.cpp +++ b/lib/CodeGen/TargetInstrInfo.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" +#include "llvm/IR/DataLayout.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/CommandLine.h" @@ -276,6 +277,36 @@ bool TargetInstrInfo::hasStoreToStackSlot(const MachineInstr *MI, return false; } +bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC, + unsigned SubIdx, unsigned &Size, + unsigned &Offset, + const TargetMachine *TM) const { + if (!SubIdx) { + Size = RC->getSize(); + Offset = 0; + return true; + } + unsigned BitSize = TM->getRegisterInfo()->getSubRegIdxSize(SubIdx); + // Convert bit size to byte size to be consistent with + // MCRegisterClass::getSize(). + if (BitSize % 8) + return false; + + int BitOffset = TM->getRegisterInfo()->getSubRegIdxOffset(SubIdx); + if (BitOffset < 0 || BitOffset % 8) + return false; + + Size = BitSize /= 8; + Offset = (unsigned)BitOffset / 8; + + assert(RC->getSize() >= (Offset + Size) && "bad subregister range"); + + if (!TM->getDataLayout()->isLittleEndian()) { + Offset = RC->getSize() - (Offset + Size); + } + return true; +} + void TargetInstrInfo::reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, @@ -364,6 +395,7 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, // Ask the target to do the actual folding. if (MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, FI)) { + NewMI->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); // Add a memory operand, foldMemoryOperandImpl doesn't do that. assert((!(Flags & MachineMemOperand::MOStore) || NewMI->mayStore()) && @@ -424,9 +456,19 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, NewMI = MBB.insert(MI, NewMI); // Copy the memoperands from the load to the folded instruction. - NewMI->setMemRefs(LoadMI->memoperands_begin(), - LoadMI->memoperands_end()); - + if (MI->memoperands_empty()) { + NewMI->setMemRefs(LoadMI->memoperands_begin(), + LoadMI->memoperands_end()); + } + else { + // Handle the rare case of folding multiple loads. + NewMI->setMemRefs(MI->memoperands_begin(), + MI->memoperands_end()); + for (MachineInstr::mmo_iterator I = LoadMI->memoperands_begin(), + E = LoadMI->memoperands_end(); I != E; ++I) { + NewMI->addMemOperand(MF, *I); + } + } return NewMI; } @@ -630,6 +672,10 @@ unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel *SchedModel, return 1; } +unsigned TargetInstrInfo::getPredicationCost(const MachineInstr *) const { + return 0; +} + unsigned TargetInstrInfo:: getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr *MI, diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index 8d8f81b..30305af 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -191,6 +191,11 @@ static void InitLibcallNames(const char **Names, const TargetMachine &TM) { Names[RTLIB::NEARBYINT_F80] = "nearbyintl"; Names[RTLIB::NEARBYINT_F128] = "nearbyintl"; Names[RTLIB::NEARBYINT_PPCF128] = "nearbyintl"; + Names[RTLIB::ROUND_F32] = "roundf"; + Names[RTLIB::ROUND_F64] = "round"; + Names[RTLIB::ROUND_F80] = "roundl"; + Names[RTLIB::ROUND_F128] = "roundl"; + Names[RTLIB::ROUND_PPCF128] = "roundl"; Names[RTLIB::FLOOR_F32] = "floorf"; Names[RTLIB::FLOOR_F64] = "floor"; Names[RTLIB::FLOOR_F80] = "floorl"; @@ -313,34 +318,62 @@ static void InitLibcallNames(const char **Names, const TargetMachine &TM) { Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2"; Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4] = "__sync_val_compare_and_swap_4"; Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8] = "__sync_val_compare_and_swap_8"; + Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_16] = "__sync_val_compare_and_swap_16"; Names[RTLIB::SYNC_LOCK_TEST_AND_SET_1] = "__sync_lock_test_and_set_1"; Names[RTLIB::SYNC_LOCK_TEST_AND_SET_2] = "__sync_lock_test_and_set_2"; Names[RTLIB::SYNC_LOCK_TEST_AND_SET_4] = "__sync_lock_test_and_set_4"; Names[RTLIB::SYNC_LOCK_TEST_AND_SET_8] = "__sync_lock_test_and_set_8"; + Names[RTLIB::SYNC_LOCK_TEST_AND_SET_16] = "__sync_lock_test_and_set_16"; Names[RTLIB::SYNC_FETCH_AND_ADD_1] = "__sync_fetch_and_add_1"; Names[RTLIB::SYNC_FETCH_AND_ADD_2] = "__sync_fetch_and_add_2"; Names[RTLIB::SYNC_FETCH_AND_ADD_4] = "__sync_fetch_and_add_4"; Names[RTLIB::SYNC_FETCH_AND_ADD_8] = "__sync_fetch_and_add_8"; + Names[RTLIB::SYNC_FETCH_AND_ADD_16] = "__sync_fetch_and_add_16"; Names[RTLIB::SYNC_FETCH_AND_SUB_1] = "__sync_fetch_and_sub_1"; Names[RTLIB::SYNC_FETCH_AND_SUB_2] = "__sync_fetch_and_sub_2"; Names[RTLIB::SYNC_FETCH_AND_SUB_4] = "__sync_fetch_and_sub_4"; Names[RTLIB::SYNC_FETCH_AND_SUB_8] = "__sync_fetch_and_sub_8"; + Names[RTLIB::SYNC_FETCH_AND_SUB_16] = "__sync_fetch_and_sub_16"; Names[RTLIB::SYNC_FETCH_AND_AND_1] = "__sync_fetch_and_and_1"; Names[RTLIB::SYNC_FETCH_AND_AND_2] = "__sync_fetch_and_and_2"; Names[RTLIB::SYNC_FETCH_AND_AND_4] = "__sync_fetch_and_and_4"; Names[RTLIB::SYNC_FETCH_AND_AND_8] = "__sync_fetch_and_and_8"; + Names[RTLIB::SYNC_FETCH_AND_AND_16] = "__sync_fetch_and_and_16"; Names[RTLIB::SYNC_FETCH_AND_OR_1] = "__sync_fetch_and_or_1"; Names[RTLIB::SYNC_FETCH_AND_OR_2] = "__sync_fetch_and_or_2"; Names[RTLIB::SYNC_FETCH_AND_OR_4] = "__sync_fetch_and_or_4"; Names[RTLIB::SYNC_FETCH_AND_OR_8] = "__sync_fetch_and_or_8"; + Names[RTLIB::SYNC_FETCH_AND_OR_16] = "__sync_fetch_and_or_16"; Names[RTLIB::SYNC_FETCH_AND_XOR_1] = "__sync_fetch_and_xor_1"; Names[RTLIB::SYNC_FETCH_AND_XOR_2] = "__sync_fetch_and_xor_2"; Names[RTLIB::SYNC_FETCH_AND_XOR_4] = "__sync_fetch_and_xor_4"; Names[RTLIB::SYNC_FETCH_AND_XOR_8] = "__sync_fetch_and_xor_8"; + Names[RTLIB::SYNC_FETCH_AND_XOR_16] = "__sync_fetch_and_xor_16"; Names[RTLIB::SYNC_FETCH_AND_NAND_1] = "__sync_fetch_and_nand_1"; Names[RTLIB::SYNC_FETCH_AND_NAND_2] = "__sync_fetch_and_nand_2"; Names[RTLIB::SYNC_FETCH_AND_NAND_4] = "__sync_fetch_and_nand_4"; Names[RTLIB::SYNC_FETCH_AND_NAND_8] = "__sync_fetch_and_nand_8"; + Names[RTLIB::SYNC_FETCH_AND_NAND_16] = "__sync_fetch_and_nand_16"; + Names[RTLIB::SYNC_FETCH_AND_MAX_1] = "__sync_fetch_and_max_1"; + Names[RTLIB::SYNC_FETCH_AND_MAX_2] = "__sync_fetch_and_max_2"; + Names[RTLIB::SYNC_FETCH_AND_MAX_4] = "__sync_fetch_and_max_4"; + Names[RTLIB::SYNC_FETCH_AND_MAX_8] = "__sync_fetch_and_max_8"; + Names[RTLIB::SYNC_FETCH_AND_MAX_16] = "__sync_fetch_and_max_16"; + Names[RTLIB::SYNC_FETCH_AND_UMAX_1] = "__sync_fetch_and_umax_1"; + Names[RTLIB::SYNC_FETCH_AND_UMAX_2] = "__sync_fetch_and_umax_2"; + Names[RTLIB::SYNC_FETCH_AND_UMAX_4] = "__sync_fetch_and_umax_4"; + Names[RTLIB::SYNC_FETCH_AND_UMAX_8] = "__sync_fetch_and_umax_8"; + Names[RTLIB::SYNC_FETCH_AND_UMAX_16] = "__sync_fetch_and_umax_16"; + Names[RTLIB::SYNC_FETCH_AND_MIN_1] = "__sync_fetch_and_min_1"; + Names[RTLIB::SYNC_FETCH_AND_MIN_2] = "__sync_fetch_and_min_2"; + Names[RTLIB::SYNC_FETCH_AND_MIN_4] = "__sync_fetch_and_min_4"; + Names[RTLIB::SYNC_FETCH_AND_MIN_8] = "__sync_fetch_and_min_8"; + Names[RTLIB::SYNC_FETCH_AND_MIN_16] = "__sync_fetch_and_min_16"; + Names[RTLIB::SYNC_FETCH_AND_UMIN_1] = "__sync_fetch_and_umin_1"; + Names[RTLIB::SYNC_FETCH_AND_UMIN_2] = "__sync_fetch_and_umin_2"; + Names[RTLIB::SYNC_FETCH_AND_UMIN_4] = "__sync_fetch_and_umin_4"; + Names[RTLIB::SYNC_FETCH_AND_UMIN_8] = "__sync_fetch_and_umin_8"; + Names[RTLIB::SYNC_FETCH_AND_UMIN_16] = "__sync_fetch_and_umin_16"; if (Triple(TM.getTargetTriple()).getEnvironment() == Triple::GNU) { Names[RTLIB::SINCOS_F32] = "sincosf"; @@ -356,6 +389,13 @@ static void InitLibcallNames(const char **Names, const TargetMachine &TM) { Names[RTLIB::SINCOS_F128] = 0; Names[RTLIB::SINCOS_PPCF128] = 0; } + + if (Triple(TM.getTargetTriple()).getOS() != Triple::OpenBSD) { + Names[RTLIB::STACKPROTECTOR_CHECK_FAIL] = "__stack_chk_fail"; + } else { + // These are generally not available. + Names[RTLIB::STACKPROTECTOR_CHECK_FAIL] = 0; + } } /// InitLibcallCallingConvs - Set default libcall CallingConvs. @@ -624,7 +664,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm, // Perform these initializations only once. IsLittleEndian = TD->isLittleEndian(); - PointerTy = MVT::getIntegerVT(8*TD->getPointerSize(0)); MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8; MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize = MaxStoresPerMemmoveOptSize = 4; @@ -682,6 +721,14 @@ void TargetLoweringBase::initActions() { // These operations default to expand. setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand); + + // These library functions default to expand. + setOperationAction(ISD::FROUND, (MVT::SimpleValueType)VT, Expand); + + // These operations default to expand for vector types. + if (VT >= MVT::FIRST_VECTOR_VALUETYPE && + VT <= MVT::LAST_VECTOR_VALUETYPE) + setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand); } // Most targets ignore the @llvm.prefetch intrinsic. @@ -747,6 +794,19 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand); } +MVT TargetLoweringBase::getPointerTy(uint32_t AS) const { + return MVT::getIntegerVT(getPointerSizeInBits(AS)); +} + +unsigned TargetLoweringBase::getPointerSizeInBits(uint32_t AS) const { + return TD->getPointerSizeInBits(AS); +} + +unsigned TargetLoweringBase::getPointerTypeSizeInBits(Type *Ty) const { + assert(Ty->isPointerTy()); + return getPointerSizeInBits(Ty->getPointerAddressSpace()); +} + MVT TargetLoweringBase::getScalarShiftAmountTy(EVT LHSTy) const { return MVT::getIntegerVT(8*TD->getPointerSize(0)); } @@ -1162,7 +1222,7 @@ void llvm::GetReturnInfo(Type* ReturnType, AttributeSet attr, Flags.setZExt(); for (unsigned i = 0; i < NumParts; ++i) - Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true, 0, 0)); + Outs.push_back(ISD::OutputArg(Flags, PartVT, VT, /*isFixed=*/true, 0, 0)); } } @@ -1228,6 +1288,7 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const { case PtrToInt: return ISD::BITCAST; case IntToPtr: return ISD::BITCAST; case BitCast: return ISD::BITCAST; + case AddrSpaceCast: return ISD::ADDRSPACECAST; case ICmp: return ISD::SETCC; case FCmp: return ISD::SETCC; case PHI: return 0; diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 07cf871..59d7b57 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -52,10 +52,10 @@ TargetLoweringObjectFileELF::getCFIPersonalitySymbol(const GlobalValue *GV, default: report_fatal_error("We do not support this DWARF encoding yet!"); case dwarf::DW_EH_PE_absptr: - return Mang->getSymbol(GV); + return getSymbol(*Mang, GV); case dwarf::DW_EH_PE_pcrel: { return getContext().GetOrCreateSymbol(StringRef("DW.ref.") + - Mang->getSymbol(GV)->getName()); + getSymbol(*Mang, GV)->getName()); } } } @@ -104,7 +104,7 @@ getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang, MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str()); MachineModuleInfoImpl::StubValueTy &StubSym = ELFMMI.getGVStubEntry(SSym); if (StubSym.getPointer() == 0) { - MCSymbol *Sym = Mang->getSymbol(GV); + MCSymbol *Sym = getSymbol(*Mang, GV); StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); } @@ -252,7 +252,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Prefix = getSectionPrefixForGlobal(Kind); SmallString<128> Name(Prefix, Prefix+strlen(Prefix)); - MCSymbol *Sym = Mang->getSymbol(GV); + MCSymbol *Sym = getSymbol(*Mang, GV); Name.append(Sym->getName().begin(), Sym->getName().end()); StringRef Group = ""; unsigned Flags = getELFSectionFlags(Kind); @@ -523,6 +523,11 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, const MCSection *TargetLoweringObjectFileMachO:: SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler *Mang, const TargetMachine &TM) const { + + // Handle thread local data. + if (Kind.isThreadBSS()) return TLSBSSSection; + if (Kind.isThreadData()) return TLSDataSection; + if (Kind.isText()) return GV->isWeakForLinker() ? TextCoalSection : TextSection; @@ -575,10 +580,6 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, if (Kind.isBSSLocal()) return DataBSSSection; - // Handle thread local data. - if (Kind.isThreadBSS()) return TLSBSSSection; - if (Kind.isThreadData()) return TLSDataSection; - // Otherwise, just drop the variable in the normal data section. return DataSection; } @@ -613,7 +614,7 @@ shouldEmitUsedDirectiveFor(const GlobalValue *GV, Mangler *Mang) const { // FIXME: ObjC metadata is currently emitted as internal symbols that have // \1L and \0l prefixes on them. Fix them to be Private/LinkerPrivate and // this horrible hack can go away. - MCSymbol *Sym = Mang->getSymbol(GV); + MCSymbol *Sym = getSymbol(*Mang, GV); if (Sym->getName()[0] == 'L' || Sym->getName()[0] == 'l') return false; } @@ -642,7 +643,7 @@ getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang, GV->hasHiddenVisibility() ? MachOMMI.getHiddenGVStubEntry(SSym) : MachOMMI.getGVStubEntry(SSym); if (StubSym.getPointer() == 0) { - MCSymbol *Sym = Mang->getSymbol(GV); + MCSymbol *Sym = getSymbol(*Mang, GV); StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); } @@ -671,7 +672,7 @@ getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang, MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str()); MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym); if (StubSym.getPointer() == 0) { - MCSymbol *Sym = Mang->getSymbol(GV); + MCSymbol *Sym = getSymbol(*Mang, GV); StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); } @@ -732,6 +733,7 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, return getContext().getCOFFSection(Name, Characteristics, Kind, + "", Selection); } @@ -767,16 +769,22 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; return getContext().getCOFFSection(Name.str(), Characteristics, - Kind, COFF::IMAGE_COMDAT_SELECT_ANY); + Kind, "", COFF::IMAGE_COMDAT_SELECT_ANY); } if (Kind.isText()) - return getTextSection(); + return TextSection; if (Kind.isThreadLocal()) - return getTLSDataSection(); + return TLSDataSection; - return getDataSection(); + if (Kind.isReadOnly()) + return ReadOnlySection; + + if (Kind.isBSS()) + return BSSSection; + + return DataSection; } void TargetLoweringObjectFileCOFF:: diff --git a/lib/CodeGen/TargetOptionsImpl.cpp b/lib/CodeGen/TargetOptionsImpl.cpp index 7a39a4c..f7bf86b 100644 --- a/lib/CodeGen/TargetOptionsImpl.cpp +++ b/lib/CodeGen/TargetOptionsImpl.cpp @@ -22,10 +22,8 @@ using namespace llvm; bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const { // Check to see if we should eliminate non-leaf frame pointers and then // check to see if we should eliminate all frame pointers. - bool NoFramePointerElimNonLeaf = - MF.getFunction()->getFnAttribute("no-frame-pointer-elim-non-leaf") - .getValueAsString() == "true"; - if (NoFramePointerElimNonLeaf && !NoFramePointerElim) { + if (MF.getFunction()->hasFnAttribute("no-frame-pointer-elim-non-leaf") && + !NoFramePointerElim) { const MachineFrameInfo *MFI = MF.getFrameInfo(); return MFI->hasCalls(); } diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp index ffcee1f..5a15243 100644 --- a/lib/CodeGen/TargetRegisterInfo.cpp +++ b/lib/CodeGen/TargetRegisterInfo.cpp @@ -73,6 +73,14 @@ void PrintRegUnit::print(raw_ostream &OS) const { OS << '~' << TRI->getName(*Roots); } +void PrintVRegOrUnit::print(raw_ostream &OS) const { + if (TRI && TRI->isVirtualRegister(Unit)) { + OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Unit); + return; + } + PrintRegUnit::print(OS); +} + /// getAllocatableClass - Return the maximal subclass of the given register /// class that is alloctable, or NULL. const TargetRegisterClass * diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp index 64ee9d1..b0f2ca6 100644 --- a/lib/CodeGen/TargetSchedule.cpp +++ b/lib/CodeGen/TargetSchedule.cpp @@ -210,7 +210,8 @@ unsigned TargetSchedModel::computeOperandLatency( // unit latency (defaultDefLatency may be too conservative). #ifndef NDEBUG if (SCDesc->isValid() && !DefMI->getOperand(DefOperIdx).isImplicit() - && !DefMI->getDesc().OpInfo[DefOperIdx].isOptionalDef()) { + && !DefMI->getDesc().OpInfo[DefOperIdx].isOptionalDef() + && SchedModel.isComplete()) { std::string Err; raw_string_ostream ss(Err); ss << "DefIdx " << DefIdx << " exceeds machine model writes for " @@ -224,10 +225,13 @@ unsigned TargetSchedModel::computeOperandLatency( return DefMI->isTransient() ? 0 : TII->defaultDefLatency(&SchedModel, DefMI); } -unsigned TargetSchedModel::computeInstrLatency(const MachineInstr *MI) const { +unsigned +TargetSchedModel::computeInstrLatency(const MachineInstr *MI, + bool UseDefaultDefLatency) const { // For the itinerary model, fall back to the old subtarget hook. // Allow subtargets to compute Bundle latencies outside the machine model. - if (hasInstrItineraries() || MI->isBundle()) + if (hasInstrItineraries() || MI->isBundle() || + (!hasInstrSchedModel() && !UseDefaultDefLatency)) return TII->getInstrLatency(&InstrItins, MI); if (hasInstrSchedModel()) { diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index c52e675..b9a6b47 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1400,7 +1400,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, VNInfo *VNI = LI.getNextValue(LastCopyIdx, LIS->getVNInfoAllocator()); SlotIndex endIdx = LIS->getInstructionIndex(MI).getRegSlot(IsEarlyClobber); - LI.addRange(LiveRange(LastCopyIdx, endIdx, VNI)); + LI.addSegment(LiveInterval::Segment(LastCopyIdx, endIdx, VNI)); } } @@ -1457,7 +1457,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, SlotIndex UseIdx = MIIdx.getRegSlot(IsEarlyClobber); if (I->end == UseIdx) - LI.removeRange(LastCopyIdx, UseIdx); + LI.removeSegment(LastCopyIdx, UseIdx); } } else if (RemovedKillFlag) { diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp index a95ebcd..f735ef2 100644 --- a/lib/CodeGen/UnreachableBlockElim.cpp +++ b/lib/CodeGen/UnreachableBlockElim.cpp @@ -24,7 +24,6 @@ #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/ProfileInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -50,7 +49,6 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<DominatorTree>(); - AU.addPreserved<ProfileInfo>(); } }; } @@ -87,9 +85,7 @@ bool UnreachableBlockElim::runOnFunction(Function &F) { } // Actually remove the blocks now. - ProfileInfo *PI = getAnalysisIfAvailable<ProfileInfo>(); for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) { - if (PI) PI->removeBlock(DeadBlocks[i]); DeadBlocks[i]->eraseFromParent(); } diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index cd012d2..e0aa405 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -28,6 +28,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" @@ -264,15 +265,36 @@ void VirtRegRewriter::rewrite() { SmallVector<unsigned, 8> SuperDeads; SmallVector<unsigned, 8> SuperDefs; SmallVector<unsigned, 8> SuperKills; + SmallPtrSet<const MachineInstr *, 4> NoReturnInsts; for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); MBBI != MBBE; ++MBBI) { DEBUG(MBBI->print(dbgs(), Indexes)); + bool IsExitBB = MBBI->succ_empty(); for (MachineBasicBlock::instr_iterator MII = MBBI->instr_begin(), MIE = MBBI->instr_end(); MII != MIE;) { MachineInstr *MI = MII; ++MII; + // Check if this instruction is a call to a noreturn function. + // If so, all the definitions set by this instruction can be ignored. + if (IsExitBB && MI->isCall()) + for (MachineInstr::mop_iterator MOI = MI->operands_begin(), + MOE = MI->operands_end(); MOI != MOE; ++MOI) { + MachineOperand &MO = *MOI; + if (!MO.isGlobal()) + continue; + const Function *Func = dyn_cast<Function>(MO.getGlobal()); + if (!Func || !Func->hasFnAttribute(Attribute::NoReturn) || + // We need to keep correct unwind information + // even if the function will not return, since the + // runtime may need it. + !Func->hasFnAttribute(Attribute::NoUnwind)) + continue; + NoReturnInsts.insert(MI); + break; + } + for (MachineInstr::mop_iterator MOI = MI->operands_begin(), MOE = MI->operands_end(); MOI != MOE; ++MOI) { MachineOperand &MO = *MOI; @@ -353,7 +375,25 @@ void VirtRegRewriter::rewrite() { } // Tell MRI about physical registers in use. - for (unsigned Reg = 1, RegE = TRI->getNumRegs(); Reg != RegE; ++Reg) - if (!MRI->reg_nodbg_empty(Reg)) - MRI->setPhysRegUsed(Reg); + if (NoReturnInsts.empty()) { + for (unsigned Reg = 1, RegE = TRI->getNumRegs(); Reg != RegE; ++Reg) + if (!MRI->reg_nodbg_empty(Reg)) + MRI->setPhysRegUsed(Reg); + } else { + for (unsigned Reg = 1, RegE = TRI->getNumRegs(); Reg != RegE; ++Reg) { + if (MRI->reg_nodbg_empty(Reg)) + continue; + // Check if this register has a use that will impact the rest of the + // code. Uses in debug and noreturn instructions do not impact the + // generated code. + for (MachineRegisterInfo::reg_nodbg_iterator It = + MRI->reg_nodbg_begin(Reg), + EndIt = MRI->reg_nodbg_end(); It != EndIt; ++It) { + if (!NoReturnInsts.count(&(*It))) { + MRI->setPhysRegUsed(Reg); + break; + } + } + } + } } |