diff options
author | Stephen Hines <srhines@google.com> | 2014-04-23 16:57:46 -0700 |
---|---|---|
committer | Stephen Hines <srhines@google.com> | 2014-04-24 15:53:16 -0700 |
commit | 36b56886974eae4f9c5ebc96befd3e7bfe5de338 (patch) | |
tree | e6cfb69fbbd937f450eeb83bfb83b9da3b01275a /lib/CodeGen | |
parent | 69a8640022b04415ae9fac62f8ab090601d8f889 (diff) | |
download | external_llvm-36b56886974eae4f9c5ebc96befd3e7bfe5de338.zip external_llvm-36b56886974eae4f9c5ebc96befd3e7bfe5de338.tar.gz external_llvm-36b56886974eae4f9c5ebc96befd3e7bfe5de338.tar.bz2 |
Update to LLVM 3.5a.
Change-Id: Ifadecab779f128e62e430c2b4f6ddd84953ed617
Diffstat (limited to 'lib/CodeGen')
160 files changed, 13829 insertions, 6407 deletions
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 2ee7767..25c438c 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -403,8 +403,18 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, continue; // Update def for Reg and aliases. - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { + // We need to be careful here not to define already-live super registers. + // If the super register is already live, then this definition is not + // a definition of the whole super register (just a partial insertion + // into it). Earlier subregister definitions (which we've not yet visited + // because we're iterating bottom-up) need to be linked to the same group + // as this definition. + if (TRI->isSuperRegister(Reg, *AI) && State->IsLive(*AI)) + continue; + DefIndices[*AI] = Count; + } } } diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h index 6683630..29b6a10 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.h +++ b/lib/CodeGen/AggressiveAntiDepBreaker.h @@ -136,7 +136,7 @@ class RegisterClassInfo; ~AggressiveAntiDepBreaker(); /// Start - Initialize anti-dep breaking for a new basic block. - void StartBlock(MachineBasicBlock *BB); + void StartBlock(MachineBasicBlock *BB) override; /// BreakAntiDependencies - Identifiy anti-dependencies along the critical /// path @@ -146,15 +146,16 @@ class RegisterClassInfo; MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned InsertPosIndex, - DbgValueVector &DbgValues); + DbgValueVector &DbgValues) override; /// Observe - Update liveness information to account for the current /// instruction, which will not be scheduled. /// - void Observe(MachineInstr *MI, unsigned Count, unsigned InsertPosIndex); + void Observe(MachineInstr *MI, unsigned Count, + unsigned InsertPosIndex) override; /// Finish - Finish anti-dep breaking for a basic block. - void FinishBlock(); + void FinishBlock() override; private: /// Keep track of a position in the allocation order for each regclass. diff --git a/lib/CodeGen/AllocationOrder.h b/lib/CodeGen/AllocationOrder.h index aed461a..64ff2a7 100644 --- a/lib/CodeGen/AllocationOrder.h +++ b/lib/CodeGen/AllocationOrder.h @@ -45,10 +45,12 @@ public: /// Return the next physical register in the allocation order, or 0. /// It is safe to call next() again after it returned 0, it will keep /// returning 0 until rewind() is called. - unsigned next() { + unsigned next(unsigned Limit = 0) { if (Pos < 0) return Hints.end()[Pos++]; - while (Pos < int(Order.size())) { + if (!Limit) + Limit = Order.size(); + while (Pos < int(Limit)) { unsigned Reg = Order[Pos++]; if (!isHint(Reg)) return Reg; diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index 1600c67..6ac5de2 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -498,8 +498,7 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, // chain interposes between I and the return. if (I->mayHaveSideEffects() || I->mayReadFromMemory() || !isSafeToSpeculativelyExecute(I)) - for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ; - --BBI) { + for (BasicBlock::const_iterator BBI = std::prev(ExitBB->end(), 2);; --BBI) { if (&*BBI == I) break; // Debug info intrinsics do not get in the way of tail call optimization. diff --git a/lib/CodeGen/Android.mk b/lib/CodeGen/Android.mk index be0c6e2..26f04d0 100644 --- a/lib/CodeGen/Android.mk +++ b/lib/CodeGen/Android.mk @@ -9,6 +9,7 @@ codegen_SRC_FILES := \ CalcSpillWeights.cpp \ CallingConvLower.cpp \ CodeGen.cpp \ + CodeGenPrepare.cpp \ CriticalAntiDepBreaker.cpp \ DeadMachineInstructionElim.cpp \ DFAPacketizer.cpp \ @@ -33,10 +34,10 @@ codegen_SRC_FILES := \ LiveIntervalAnalysis.cpp \ LiveInterval.cpp \ LiveIntervalUnion.cpp \ + LivePhysRegs.cpp \ LiveRangeCalc.cpp \ LiveRangeEdit.cpp \ LiveRegMatrix.cpp \ - LiveRegUnits.cpp \ LiveStackAnalysis.cpp \ LiveVariables.cpp \ LLVMTargetMachine.cpp \ @@ -97,6 +98,7 @@ codegen_SRC_FILES := \ SpillPlacement.cpp \ SplitKit.cpp \ StackColoring.cpp \ + StackMapLivenessAnalysis.cpp \ StackMaps.cpp \ StackProtector.cpp \ StackSlotColoring.cpp \ @@ -127,6 +129,7 @@ include $(BUILD_HOST_STATIC_LIBRARY) # For the device # ===================================================== +ifneq (true,$(DISABLE_LLVM_DEVICE_BUILDS)) include $(CLEAR_VARS) LOCAL_SRC_FILES := $(codegen_SRC_FILES) @@ -137,3 +140,4 @@ LOCAL_MODULE_TAGS := optional include $(LLVM_DEVICE_BUILD_MK) include $(LLVM_GEN_INTRINSICS_MK) include $(BUILD_STATIC_LIBRARY) +endif diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp index 5d82dd9..403feb4 100644 --- a/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -30,43 +31,52 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Target/Mangler.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; -static cl::opt<bool> -EnableARMEHABIDescriptors("arm-enable-ehabi-descriptors", cl::Hidden, - cl::desc("Generate ARM EHABI tables with unwinding descriptors"), - cl::init(false)); - - ARMException::ARMException(AsmPrinter *A) - : DwarfException(A) {} + : DwarfException(A), + shouldEmitCFI(false) {} ARMException::~ARMException() {} ARMTargetStreamer &ARMException::getTargetStreamer() { - MCTargetStreamer &TS = Asm->OutStreamer.getTargetStreamer(); + MCTargetStreamer &TS = *Asm->OutStreamer.getTargetStreamer(); return static_cast<ARMTargetStreamer &>(TS); } -void ARMException::EndModule() { +/// endModule - Emit all exception information that should come after the +/// content. +void ARMException::endModule() { + if (shouldEmitCFI) + Asm->OutStreamer.EmitCFISections(false, true); } -/// BeginFunction - Gather pre-function exception information. Assumes it's +/// beginFunction - Gather pre-function exception information. Assumes it's /// being emitted immediately after the function entry point. -void ARMException::BeginFunction(const MachineFunction *MF) { +void ARMException::beginFunction(const MachineFunction *MF) { getTargetStreamer().emitFnStart(); if (Asm->MF->getFunction()->needsUnwindTableEntry()) Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber())); + // See if we need call frame info. + AsmPrinter::CFIMoveType MoveType = Asm->needsCFIMoves(); + assert(MoveType != AsmPrinter::CFI_M_EH && + "non-EH CFI not yet supported in prologue with EHABI lowering"); + if (MoveType == AsmPrinter::CFI_M_Debug) { + shouldEmitCFI = true; + Asm->OutStreamer.EmitCFIStartProc(false); + } } -/// EndFunction - Gather and emit post-function exception information. +/// endFunction - Gather and emit post-function exception information. /// -void ARMException::EndFunction() { +void ARMException::endFunction(const MachineFunction *) { + if (shouldEmitCFI) + Asm->OutStreamer.EmitCFIEndProc(); + ARMTargetStreamer &ATS = getTargetStreamer(); if (!Asm->MF->getFunction()->needsUnwindTableEntry()) ATS.emitCantUnwind(); @@ -74,25 +84,23 @@ void ARMException::EndFunction() { Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber())); - if (EnableARMEHABIDescriptors) { - // Map all labels and get rid of any dead landing pads. - MMI->TidyLandingPads(); + // Map all labels and get rid of any dead landing pads. + MMI->TidyLandingPads(); - if (!MMI->getLandingPads().empty()) { - // Emit references to personality. - if (const Function * Personality = - MMI->getPersonalities()[MMI->getPersonalityIndex()]) { - MCSymbol *PerSym = Asm->getSymbol(Personality); - Asm->OutStreamer.EmitSymbolAttribute(PerSym, MCSA_Global); - ATS.emitPersonality(PerSym); - } + if (!MMI->getLandingPads().empty()) { + // Emit references to personality. + if (const Function * Personality = + MMI->getPersonalities()[MMI->getPersonalityIndex()]) { + MCSymbol *PerSym = Asm->getSymbol(Personality); + Asm->OutStreamer.EmitSymbolAttribute(PerSym, MCSA_Global); + ATS.emitPersonality(PerSym); + } - // Emit .handlerdata directive. - ATS.emitHandlerData(); + // Emit .handlerdata directive. + ATS.emitHandlerData(); - // Emit actual exception table - EmitExceptionTable(); - } + // Emit actual exception table + EmitExceptionTable(); } } diff --git a/lib/CodeGen/AsmPrinter/Android.mk b/lib/CodeGen/AsmPrinter/Android.mk index b2cc47e..a725fba 100644 --- a/lib/CodeGen/AsmPrinter/Android.mk +++ b/lib/CodeGen/AsmPrinter/Android.mk @@ -16,12 +16,13 @@ LOCAL_SRC_FILES := \ DIEHash.cpp \ DwarfAccelTable.cpp \ DwarfCFIException.cpp \ - DwarfCompileUnit.cpp \ DwarfDebug.cpp \ DwarfException.cpp \ + DwarfUnit.cpp \ ErlangGCPrinter.cpp \ OcamlGCPrinter.cpp \ - Win64Exception.cpp + Win64Exception.cpp \ + WinCodeViewLineTables.cpp LOCAL_MODULE:= libLLVMAsmPrinter @@ -33,6 +34,7 @@ include $(BUILD_HOST_STATIC_LIBRARY) # For the device # ===================================================== +ifneq (true,$(DISABLE_LLVM_DEVICE_BUILDS)) include $(CLEAR_VARS) LOCAL_SRC_FILES := \ @@ -44,12 +46,13 @@ LOCAL_SRC_FILES := \ DIEHash.cpp \ DwarfAccelTable.cpp \ DwarfCFIException.cpp \ - DwarfCompileUnit.cpp \ DwarfDebug.cpp \ DwarfException.cpp \ + DwarfUnit.cpp \ ErlangGCPrinter.cpp \ + OcamlGCPrinter.cpp \ Win64Exception.cpp \ - $(LOCAL_SRC_FILES) + WinCodeViewLineTables.cpp LOCAL_MODULE:= libLLVMAsmPrinter @@ -58,3 +61,4 @@ LOCAL_MODULE_TAGS := optional include $(LLVM_DEVICE_BUILD_MK) include $(LLVM_GEN_INTRINSICS_MK) include $(BUILD_STATIC_LIBRARY) +endif diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 308b0e0..c3afc8b 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -15,19 +15,21 @@ #include "llvm/CodeGen/AsmPrinter.h" #include "DwarfDebug.h" #include "DwarfException.h" +#include "WinCodeViewLineTables.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/GCMetadataPrinter.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/DebugInfo.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/MC/MCAsmInfo.h" @@ -41,19 +43,20 @@ #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Timer.h" -#include "llvm/Target/Mangler.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Utils/GlobalStatus.h" using namespace llvm; static const char *const DWARFGroupName = "DWARF Emission"; -static const char *const DbgTimerName = "DWARF Debug Writer"; +static const char *const DbgTimerName = "Debug Info Emission"; static const char *const EHTimerName = "DWARF Exception Writer"; +static const char *const CodeViewLineTablesGroupName = "CodeView Line Tables"; STATISTIC(EmittedInsts, "Number of machine instrs printed"); @@ -99,14 +102,14 @@ AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer) OutContext(Streamer.getContext()), OutStreamer(Streamer), LastMI(0), LastFn(0), Counter(~0U), SetCounter(0) { - DD = 0; DE = 0; MMI = 0; LI = 0; MF = 0; + DD = 0; MMI = 0; LI = 0; MF = 0; CurrentFnSym = CurrentFnSymForSize = 0; GCMetadataPrinters = 0; VerboseAsm = Streamer.isVerboseAsm(); } AsmPrinter::~AsmPrinter() { - assert(DD == 0 && DE == 0 && "Debug/EH info didn't get finalized"); + assert(DD == 0 && Handlers.empty() && "Debug/EH info didn't get finalized"); if (GCMetadataPrinters != 0) { gcp_map_type &GCMap = getGCMap(GCMetadataPrinters); @@ -135,6 +138,14 @@ const DataLayout &AsmPrinter::getDataLayout() const { return *TM.getDataLayout(); } +const MCSubtargetInfo &AsmPrinter::getSubtargetInfo() const { + return TM.getSubtarget<MCSubtargetInfo>(); +} + +void AsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) { + S.EmitInstruction(Inst, getSubtargetInfo()); +} + StringRef AsmPrinter::getTargetTriple() const { return TM.getTargetTriple(); } @@ -163,9 +174,28 @@ bool AsmPrinter::doInitialization(Module &M) { const_cast<TargetLoweringObjectFile&>(getObjFileLowering()) .Initialize(OutContext, TM); - OutStreamer.InitStreamer(); + OutStreamer.InitSections(); - Mang = new Mangler(&TM); + Mang = new Mangler(TM.getDataLayout()); + + // Emit the version-min deplyment target directive if needed. + // + // FIXME: If we end up with a collection of these sorts of Darwin-specific + // or ELF-specific things, it may make sense to have a platform helper class + // that will work with the target helper class. For now keep it here, as the + // alternative is duplicated code in each of the target asm printers that + // use the directive, where it would need the same conditionalization + // anyway. + Triple TT(getTargetTriple()); + if (TT.isOSDarwin()) { + unsigned Major, Minor, Update; + TT.getOSVersion(Major, Minor, Update); + // If there is a version specified, Major will be non-zero. + if (Major) + OutStreamer.EmitVersionMin((TT.isMacOSX() ? + MCVM_OSXVersionMin : MCVM_IOSVersionMin), + Major, Minor, Update); + } // Allow the target to emit any magic that it wants at the start of the file. EmitStartOfAsmFile(M); @@ -192,25 +222,65 @@ bool AsmPrinter::doInitialization(Module &M) { OutStreamer.AddBlankLine(); } - if (MAI->doesSupportDebugInformation()) - DD = new DwarfDebug(this, &M); + if (MAI->doesSupportDebugInformation()) { + if (Triple(TM.getTargetTriple()).isKnownWindowsMSVCEnvironment()) { + Handlers.push_back(HandlerInfo(new WinCodeViewLineTables(this), + DbgTimerName, + CodeViewLineTablesGroupName)); + } else { + DD = new DwarfDebug(this, &M); + Handlers.push_back(HandlerInfo(DD, DbgTimerName, DWARFGroupName)); + } + } + DwarfException *DE = 0; switch (MAI->getExceptionHandlingType()) { case ExceptionHandling::None: - return false; + break; case ExceptionHandling::SjLj: case ExceptionHandling::DwarfCFI: DE = new DwarfCFIException(this); - return false; + break; case ExceptionHandling::ARM: DE = new ARMException(this); - return false; + break; case ExceptionHandling::Win64: DE = new Win64Exception(this); + break; + } + if (DE) + Handlers.push_back(HandlerInfo(DE, EHTimerName, DWARFGroupName)); + return false; +} + +static bool canBeHidden(const GlobalValue *GV, const MCAsmInfo &MAI) { + GlobalValue::LinkageTypes Linkage = GV->getLinkage(); + if (Linkage != GlobalValue::LinkOnceODRLinkage) + return false; + + if (!MAI.hasWeakDefCanBeHiddenDirective()) + return false; + + if (GV->hasUnnamedAddr()) + return true; + + // This is only used for MachO, so right now it doesn't really matter how + // we handle alias. Revisit this once the MachO linker implements aliases. + if (isa<GlobalAlias>(GV)) return false; + + // If it is a non constant variable, it needs to be uniqued across shared + // objects. + if (const GlobalVariable *Var = dyn_cast<GlobalVariable>(GV)) { + if (!Var->isConstant()) + return false; } - llvm_unreachable("Unknown exception type."); + GlobalStatus GS; + if (!GlobalStatus::analyzeGlobal(GV, GS) && !GS.IsCompared) + return true; + + return false; } void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const { @@ -221,29 +291,16 @@ void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const { case GlobalValue::LinkOnceODRLinkage: case GlobalValue::WeakAnyLinkage: case GlobalValue::WeakODRLinkage: - case GlobalValue::LinkerPrivateWeakLinkage: - if (MAI->getWeakDefDirective() != 0) { + if (MAI->hasWeakDefDirective()) { // .globl _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); - bool CanBeHidden = false; - - if (Linkage == GlobalValue::LinkOnceODRLinkage) { - if (GV->hasUnnamedAddr()) { - CanBeHidden = true; - } else { - GlobalStatus GS; - if (!GlobalStatus::analyzeGlobal(GV, GS) && !GS.IsCompared) - CanBeHidden = true; - } - } - - if (!CanBeHidden) + if (!canBeHidden(GV, *MAI)) // .weak_definition _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition); else OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefAutoPrivate); - } else if (MAI->getLinkOnceDirective() != 0) { + } else if (MAI->hasLinkOnceDirective()) { // .globl _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); //NOTE: linkonce is handled by the section the symbol was assigned to. @@ -252,7 +309,6 @@ void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const { OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Weak); } return; - case GlobalValue::DLLExportLinkage: case GlobalValue::AppendingLinkage: // FIXME: appending linkage variables should go into a section of // their name or something. For now, just emit them as external. @@ -263,19 +319,22 @@ void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const { return; case GlobalValue::PrivateLinkage: case GlobalValue::InternalLinkage: - case GlobalValue::LinkerPrivateLinkage: return; case GlobalValue::AvailableExternallyLinkage: llvm_unreachable("Should never emit this"); - case GlobalValue::DLLImportLinkage: case GlobalValue::ExternalWeakLinkage: llvm_unreachable("Don't know how to emit these"); } llvm_unreachable("Unknown linkage type!"); } +void AsmPrinter::getNameWithPrefix(SmallVectorImpl<char> &Name, + const GlobalValue *GV) const { + TM.getNameWithPrefix(Name, GV, *Mang); +} + MCSymbol *AsmPrinter::getSymbol(const GlobalValue *GV) const { - return getObjFileLowering().getSymbol(*Mang, GV); + return TM.getSymbol(GV, *Mang); } /// EmitGlobalVariable - Emit the specified global variable to the .s file. @@ -286,7 +345,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { return; if (isVerbose()) { - WriteAsOperand(OutStreamer.GetCommentOS(), GV, + GV->printAsOperand(OutStreamer.GetCommentOS(), /*PrintType=*/false, GV->getParent()); OutStreamer.GetCommentOS() << '\n'; } @@ -311,8 +370,11 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // sections and expected to be contiguous (e.g. ObjC metadata). unsigned AlignLog = getGVAlignmentLog2(GV, *DL); - if (DD) - DD->setSymbolSize(GVSym, Size); + for (unsigned I = 0, E = Handlers.size(); I != E; ++I) { + const HandlerInfo &OI = Handlers[I]; + NamedRegionTimer T(OI.TimerName, OI.TimerGroupName, TimePassesIsEnabled); + OI.Handler->setSymbolSize(GVSym, Size); + } // Handle common and BSS local symbols (.lcomm). if (GVKind.isCommon() || GVKind.isBSSLocal()) { @@ -332,7 +394,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // Handle local BSS symbols. if (MAI->hasMachoZeroFillDirective()) { const MCSection *TheSection = - getObjFileLowering().SectionForGlobal(GV, GVKind, Mang, TM); + getObjFileLowering().SectionForGlobal(GV, GVKind, *Mang, TM); // .zerofill __DATA, __bss, _foo, 400, 5 OutStreamer.EmitZerofill(TheSection, GVSym, Size, Align); return; @@ -361,7 +423,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { } const MCSection *TheSection = - getObjFileLowering().SectionForGlobal(GV, GVKind, Mang, TM); + getObjFileLowering().SectionForGlobal(GV, GVKind, *Mang, TM); // Handle the zerofill directive on darwin, which is a special form of BSS // emission. @@ -452,7 +514,8 @@ void AsmPrinter::EmitFunctionHeader() { // Print the 'header' of function. const Function *F = MF->getFunction(); - OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM)); + OutStreamer.SwitchSection( + getObjFileLowering().SectionForGlobal(F, *Mang, TM)); EmitVisibility(CurrentFnSym, F->getVisibility()); EmitLinkage(F, CurrentFnSym); @@ -462,7 +525,7 @@ void AsmPrinter::EmitFunctionHeader() { OutStreamer.EmitSymbolAttribute(CurrentFnSym, MCSA_ELF_TypeFunction); if (isVerbose()) { - WriteAsOperand(OutStreamer.GetCommentOS(), F, + F->printAsOperand(OutStreamer.GetCommentOS(), /*PrintType=*/false, F->getParent()); OutStreamer.GetCommentOS() << '\n'; } @@ -482,13 +545,10 @@ void AsmPrinter::EmitFunctionHeader() { } // Emit pre-function debug and/or EH information. - if (DE) { - NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled); - DE->BeginFunction(MF); - } - if (DD) { - NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); - DD->beginFunction(MF); + for (unsigned I = 0, E = Handlers.size(); I != E; ++I) { + const HandlerInfo &OI = Handlers[I]; + NamedRegionTimer T(OI.TimerName, OI.TimerGroupName, TimePassesIsEnabled); + OI.Handler->beginFunction(MF); } // Emit the prefix data. @@ -576,10 +636,9 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { SmallString<128> Str; raw_svector_ostream OS(Str); - OS << '\t' << AP.MAI->getCommentString() << "DEBUG_VALUE: "; + OS << "DEBUG_VALUE: "; - // cast away const; DIetc do not take const operands for some reason. - DIVariable V(const_cast<MDNode*>(MI->getOperand(2).getMetadata())); + DIVariable V(MI->getOperand(2).getMetadata()); if (V.getContext().isSubprogram()) { StringRef Name = DISubprogram(V.getContext()).getDisplayName(); if (!Name.empty()) @@ -625,7 +684,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { // Suppress offset, it is not meaningful here. OS << "undef"; // NOTE: Want this comment at start of line, don't emit with AddComment. - AP.OutStreamer.EmitRawText(OS.str()); + AP.OutStreamer.emitRawComment(OS.str()); return true; } if (Deref) @@ -637,7 +696,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { OS << '+' << Offset << ']'; // NOTE: Want this comment at start of line, don't emit with AddComment. - AP.OutStreamer.EmitRawText(OS.str()); + AP.OutStreamer.emitRawComment(OS.str()); return true; } @@ -657,14 +716,11 @@ bool AsmPrinter::needsSEHMoves() { MF->getFunction()->needsUnwindTableEntry(); } -bool AsmPrinter::needsRelocationsForDwarfStringPool() const { - return MAI->doesDwarfUseRelocationsAcrossSections(); -} - -void AsmPrinter::emitPrologLabel(const MachineInstr &MI) { - const MCSymbol *Label = MI.getOperand(0).getMCSymbol(); - - if (MAI->getExceptionHandlingType() != ExceptionHandling::DwarfCFI) +void AsmPrinter::emitCFIInstruction(const MachineInstr &MI) { + ExceptionHandling::ExceptionsType ExceptionHandlingType = + MAI->getExceptionHandlingType(); + if (ExceptionHandlingType != ExceptionHandling::DwarfCFI && + ExceptionHandlingType != ExceptionHandling::ARM) return; if (needsCFIMoves() == CFI_M_None) @@ -675,16 +731,9 @@ void AsmPrinter::emitPrologLabel(const MachineInstr &MI) { const MachineModuleInfo &MMI = MF->getMMI(); const std::vector<MCCFIInstruction> &Instrs = MMI.getFrameInstructions(); - bool FoundOne = false; - (void)FoundOne; - for (std::vector<MCCFIInstruction>::const_iterator I = Instrs.begin(), - E = Instrs.end(); I != E; ++I) { - if (I->getLabel() == Label) { - emitCFIInstruction(*I); - FoundOne = true; - } - } - assert(FoundOne); + unsigned CFIIndex = MI.getOperand(0).getCFIIndex(); + const MCCFIInstruction &CFI = Instrs[CFIIndex]; + emitCFIInstruction(CFI); } /// EmitFunctionBody - This method emits the body and trailer for a @@ -693,7 +742,7 @@ void AsmPrinter::EmitFunctionBody() { // Emit target-specific gunk before the function body. EmitFunctionBodyStart(); - bool ShouldPrintDebugScopes = DD && MMI->hasDebugInfo(); + bool ShouldPrintDebugScopes = MMI->hasDebugInfo(); // Print out code for the function. bool HasAnyRealCode = false; @@ -707,23 +756,27 @@ void AsmPrinter::EmitFunctionBody() { LastMI = II; // Print the assembly for the instruction. - if (!II->isLabel() && !II->isImplicitDef() && !II->isKill() && + if (!II->isPosition() && !II->isImplicitDef() && !II->isKill() && !II->isDebugValue()) { HasAnyRealCode = true; ++EmittedInsts; } if (ShouldPrintDebugScopes) { - NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); - DD->beginInstruction(II); + for (unsigned III = 0, EEE = Handlers.size(); III != EEE; ++III) { + const HandlerInfo &OI = Handlers[III]; + NamedRegionTimer T(OI.TimerName, OI.TimerGroupName, + TimePassesIsEnabled); + OI.Handler->beginInstruction(II); + } } if (isVerbose()) emitComments(*II, OutStreamer.GetCommentOS()); switch (II->getOpcode()) { - case TargetOpcode::PROLOG_LABEL: - emitPrologLabel(*II); + case TargetOpcode::CFI_INSTRUCTION: + emitCFIInstruction(*II); break; case TargetOpcode::EH_LABEL: @@ -746,16 +799,17 @@ void AsmPrinter::EmitFunctionBody() { if (isVerbose()) emitKill(II, *this); break; default: - if (!TM.hasMCUseLoc()) - MCLineEntry::Make(&OutStreamer, getCurrentSection()); - EmitInstruction(II); break; } if (ShouldPrintDebugScopes) { - NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); - DD->endInstruction(II); + for (unsigned III = 0, EEE = Handlers.size(); III != EEE; ++III) { + const HandlerInfo &OI = Handlers[III]; + NamedRegionTimer T(OI.TimerName, OI.TimerGroupName, + TimePassesIsEnabled); + OI.Handler->endInstruction(); + } } } } @@ -765,7 +819,7 @@ void AsmPrinter::EmitFunctionBody() { // label equaling the end of function label and an invalid "row" in the // FDE. We need to emit a noop in this situation so that the FDE's rows are // valid. - bool RequiresNoop = LastMI && LastMI->isPrologLabel(); + bool RequiresNoop = LastMI && LastMI->isCFIInstruction(); // If the function is empty and the object file uses .subsections_via_symbols, // then we need to emit *something* to the function body to prevent the @@ -775,7 +829,7 @@ void AsmPrinter::EmitFunctionBody() { TM.getInstrInfo()->getNoopForMachoTarget(Noop); if (Noop.getOpcode()) { OutStreamer.AddComment("avoids zero-length function"); - OutStreamer.EmitInstruction(Noop); + OutStreamer.EmitInstruction(Noop, getSubtargetInfo()); } else // Target not mc-ized yet. OutStreamer.EmitRawText(StringRef("\tnop\n")); } @@ -811,14 +865,11 @@ void AsmPrinter::EmitFunctionBody() { OutStreamer.EmitELFSize(CurrentFnSym, SizeExp); } - // Emit post-function debug information. - if (DD) { - NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); - DD->endFunction(MF); - } - if (DE) { - NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled); - DE->EndFunction(); + // Emit post-function debug and/or EH information. + for (unsigned I = 0, E = Handlers.size(); I != E; ++I) { + const HandlerInfo &OI = Handlers[I]; + NamedRegionTimer T(OI.TimerName, OI.TimerGroupName, TimePassesIsEnabled); + OI.Handler->endFunction(MF); } MMI->EndFunction(); @@ -828,56 +879,6 @@ void AsmPrinter::EmitFunctionBody() { OutStreamer.AddBlankLine(); } -/// EmitDwarfRegOp - Emit dwarf register operation. -void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc, - bool Indirect) const { - const TargetRegisterInfo *TRI = TM.getRegisterInfo(); - int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false); - - for (MCSuperRegIterator SR(MLoc.getReg(), TRI); SR.isValid() && Reg < 0; - ++SR) { - Reg = TRI->getDwarfRegNum(*SR, false); - // FIXME: Get the bit range this register uses of the superregister - // so that we can produce a DW_OP_bit_piece - } - - // FIXME: Handle cases like a super register being encoded as - // DW_OP_reg 32 DW_OP_piece 4 DW_OP_reg 33 - - // FIXME: We have no reasonable way of handling errors in here. The - // caller might be in the middle of an dwarf expression. We should - // probably assert that Reg >= 0 once debug info generation is more mature. - - if (MLoc.isIndirect() || Indirect) { - if (Reg < 32) { - OutStreamer.AddComment( - dwarf::OperationEncodingString(dwarf::DW_OP_breg0 + Reg)); - EmitInt8(dwarf::DW_OP_breg0 + Reg); - } else { - OutStreamer.AddComment("DW_OP_bregx"); - EmitInt8(dwarf::DW_OP_bregx); - OutStreamer.AddComment(Twine(Reg)); - EmitULEB128(Reg); - } - EmitSLEB128(!MLoc.isIndirect() ? 0 : MLoc.getOffset()); - if (MLoc.isIndirect() && Indirect) - EmitInt8(dwarf::DW_OP_deref); - } else { - if (Reg < 32) { - OutStreamer.AddComment( - dwarf::OperationEncodingString(dwarf::DW_OP_reg0 + Reg)); - EmitInt8(dwarf::DW_OP_reg0 + Reg); - } else { - OutStreamer.AddComment("DW_OP_regx"); - EmitInt8(dwarf::DW_OP_regx); - OutStreamer.AddComment(Twine(Reg)); - EmitULEB128(Reg); - } - } - - // FIXME: Produce a DW_OP_bit_piece if we used a superregister -} - bool AsmPrinter::doFinalization(Module &M) { // Emit global variables. for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); @@ -901,26 +902,21 @@ bool AsmPrinter::doFinalization(Module &M) { SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags; M.getModuleFlagsMetadata(ModuleFlags); if (!ModuleFlags.empty()) - getObjFileLowering().emitModuleFlags(OutStreamer, ModuleFlags, Mang, TM); + getObjFileLowering().emitModuleFlags(OutStreamer, ModuleFlags, *Mang, TM); // Make sure we wrote out everything we need. OutStreamer.Flush(); // Finalize debug and EH information. - if (DE) { - { - NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled); - DE->EndModule(); - } - delete DE; DE = 0; - } - if (DD) { - { - NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); - DD->endModule(); - } - delete DD; DD = 0; + for (unsigned I = 0, E = Handlers.size(); I != E; ++I) { + const HandlerInfo &OI = Handlers[I]; + NamedRegionTimer T(OI.TimerName, OI.TimerGroupName, + TimePassesIsEnabled); + OI.Handler->endModule(); + delete OI.Handler; } + Handlers.clear(); + DD = 0; // If the target wants to know about weak references, print them all. if (MAI->getWeakRefDirective()) { @@ -949,11 +945,7 @@ bool AsmPrinter::doFinalization(Module &M) { MCSymbol *Name = getSymbol(I); const GlobalValue *GV = I->getAliasedGlobal(); - if (GV->isDeclaration()) { - report_fatal_error(Name->getName() + - ": Target doesn't support aliases to declarations"); - } - + assert(!GV->isDeclaration()); MCSymbol *Target = getSymbol(GV); if (I->hasExternalLinkage() || !MAI->getWeakRefDirective()) @@ -1106,6 +1098,7 @@ void AsmPrinter::EmitConstantPool() { /// by the current function to the current output stream. /// void AsmPrinter::EmitJumpTableInfo() { + const DataLayout *DL = MF->getTarget().getDataLayout(); const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); if (MJTI == 0) return; if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline) return; @@ -1125,7 +1118,8 @@ void AsmPrinter::EmitJumpTableInfo() { // FIXME: this isn't the right predicate, should be based on the MCSection // for the function. F->isWeakForLinker()) { - OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F,Mang,TM)); + OutStreamer.SwitchSection( + getObjFileLowering().SectionForGlobal(F, *Mang, TM)); } else { // Otherwise, drop it in the readonly section. const MCSection *ReadOnlySection = @@ -1171,7 +1165,7 @@ void AsmPrinter::EmitJumpTableInfo() { // before each jump table. The first label is never referenced, but tells // the assembler and linker the extents of the jump table object. The // second label is actually referenced by the code. - if (JTInDiffSection && MAI->getLinkerPrivateGlobalPrefix()[0]) + if (JTInDiffSection && DL->hasLinkerPrivateGlobalPrefix()) // FIXME: This doesn't have to have any specific name, just any randomly // named and numbered 'l' label would work. Simplify GetJTISymbol. OutStreamer.EmitLabel(GetJTISymbol(JTI, true)); @@ -1309,7 +1303,7 @@ void AsmPrinter::EmitLLVMUsedList(const ConstantArray *InitList) { for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { const GlobalValue *GV = dyn_cast<GlobalValue>(InitList->getOperand(i)->stripPointerCasts()); - if (GV && getObjFileLowering().shouldEmitUsedDirectiveFor(GV, Mang)) + if (GV) OutStreamer.EmitSymbolAttribute(getSymbol(GV), MCSA_NoDeadStrip); } } @@ -1366,7 +1360,7 @@ void AsmPrinter::EmitModuleIdents(Module &M) { if (const NamedMDNode *NMD = M.getNamedMetadata("llvm.ident")) { for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { const MDNode *N = NMD->getOperand(i); - assert(N->getNumOperands() == 1 && + assert(N->getNumOperands() == 1 && "llvm.ident metadata entry can have only one operand"); const MDString *S = cast<MDString>(N->getOperand(0)); OutStreamer.EmitIdent(S->getString()); @@ -1422,8 +1416,8 @@ void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo, /// where the size in bytes of the directive is specified by Size and Hi/Lo /// specify the labels. This implicitly uses .set if it is available. void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset, - const MCSymbol *Lo, unsigned Size) - const { + const MCSymbol *Lo, + unsigned Size) const { // Emit Hi+Offset - Lo // Get the Hi+Offset expression. @@ -1452,8 +1446,8 @@ void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset, /// where the size in bytes of the directive is specified by Size and Label /// specifies the label. This implicitly uses .set if it is available. void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, - unsigned Size, bool IsSectionRelative) - const { + unsigned Size, + bool IsSectionRelative) const { if (MAI->needsDwarfSectionOffsetDirective() && IsSectionRelative) { OutStreamer.EmitCOFFSecRel32(Label); return; @@ -1462,14 +1456,12 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, // Emit Label+Offset (or just Label if Offset is zero) const MCExpr *Expr = MCSymbolRefExpr::Create(Label, OutContext); if (Offset) - Expr = MCBinaryExpr::CreateAdd(Expr, - MCConstantExpr::Create(Offset, OutContext), - OutContext); + Expr = MCBinaryExpr::CreateAdd( + Expr, MCConstantExpr::Create(Offset, OutContext), OutContext); OutStreamer.EmitValue(Expr, Size); } - //===----------------------------------------------------------------------===// // EmitAlignment - Emit an alignment directive to the specified power of @@ -1486,7 +1478,7 @@ void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV) const { if (getCurrentSection()->getKind().isText()) OutStreamer.EmitCodeAlignment(1 << NumBits); else - OutStreamer.EmitValueToAlignment(1 << NumBits, 0, 1, 0); + OutStreamer.EmitValueToAlignment(1 << NumBits); } //===----------------------------------------------------------------------===// @@ -1515,6 +1507,11 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { llvm_unreachable("Unknown constant value to lower!"); } + if (const MCExpr *RelocExpr = + AP.getObjFileLowering().getExecutableRelativeSymbol(CE, *AP.Mang, + AP.TM)) + return RelocExpr; + switch (CE->getOpcode()) { default: // If the code isn't optimized, there may be outstanding folding @@ -1530,7 +1527,7 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { std::string S; raw_string_ostream OS(S); OS << "Unsupported expression in static initializer: "; - WriteAsOperand(OS, CE, /*PrintType=*/false, + CE->printAsOperand(OS, /*PrintType=*/false, !AP.MF ? 0 : AP.MF->getFunction()->getParent()); report_fatal_error(OS.str()); } @@ -1994,15 +1991,17 @@ void AsmPrinter::printOffset(int64_t Offset, raw_ostream &OS) const { /// GetTempSymbol - Return the MCSymbol corresponding to the assembler /// temporary label with the specified stem and unique ID. -MCSymbol *AsmPrinter::GetTempSymbol(StringRef Name, unsigned ID) const { - return OutContext.GetOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix()) + +MCSymbol *AsmPrinter::GetTempSymbol(Twine Name, unsigned ID) const { + const DataLayout *DL = TM.getDataLayout(); + return OutContext.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix()) + Name + Twine(ID)); } /// GetTempSymbol - Return an assembler temporary label with the specified /// stem. -MCSymbol *AsmPrinter::GetTempSymbol(StringRef Name) const { - return OutContext.GetOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix())+ +MCSymbol *AsmPrinter::GetTempSymbol(Twine Name) const { + const DataLayout *DL = TM.getDataLayout(); + return OutContext.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+ Name); } @@ -2017,8 +2016,9 @@ MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const { /// GetCPISymbol - Return the symbol for the specified constant pool entry. MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const { + const DataLayout *DL = TM.getDataLayout(); return OutContext.GetOrCreateSymbol - (Twine(MAI->getPrivateGlobalPrefix()) + "CPI" + Twine(getFunctionNumber()) + (Twine(DL->getPrivateGlobalPrefix()) + "CPI" + Twine(getFunctionNumber()) + "_" + Twine(CPID)); } @@ -2030,21 +2030,16 @@ MCSymbol *AsmPrinter::GetJTISymbol(unsigned JTID, bool isLinkerPrivate) const { /// GetJTSetSymbol - Return the symbol for the specified jump table .set /// FIXME: privatize to AsmPrinter. MCSymbol *AsmPrinter::GetJTSetSymbol(unsigned UID, unsigned MBBID) const { + const DataLayout *DL = TM.getDataLayout(); return OutContext.GetOrCreateSymbol - (Twine(MAI->getPrivateGlobalPrefix()) + Twine(getFunctionNumber()) + "_" + + (Twine(DL->getPrivateGlobalPrefix()) + Twine(getFunctionNumber()) + "_" + Twine(UID) + "_set_" + Twine(MBBID)); } -/// GetSymbolWithGlobalValueBase - Return the MCSymbol for a symbol with -/// global value name as its base, with the specified suffix, and where the -/// symbol is forced to have private linkage if ForcePrivate is true. -MCSymbol *AsmPrinter::GetSymbolWithGlobalValueBase(const GlobalValue *GV, - StringRef Suffix, - bool ForcePrivate) const { - SmallString<60> NameStr; - Mang->getNameWithPrefix(NameStr, GV, ForcePrivate); - NameStr.append(Suffix.begin(), Suffix.end()); - return OutContext.GetOrCreateSymbol(NameStr.str()); +MCSymbol *AsmPrinter::getSymbolWithGlobalValueBase(const GlobalValue *GV, + StringRef Suffix) const { + return getObjFileLowering().getSymbolWithGlobalValueBase(GV, Suffix, *Mang, + TM); } /// GetExternalSymbolSymbol - Return the MCSymbol for the specified @@ -2155,10 +2150,9 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const { // Print the main label for the block. if (MBB->pred_empty() || isBlockOnlyReachableByFallthrough(MBB)) { - if (isVerbose() && OutStreamer.hasRawTextSupport()) { + if (isVerbose()) { // NOTE: Want this comment at start of line, don't emit with AddComment. - OutStreamer.EmitRawText(Twine(MAI->getCommentString()) + " BB#" + - Twine(MBB->getNumber()) + ":"); + OutStreamer.emitRawComment(" BB#" + Twine(MBB->getNumber()) + ":", false); } } else { OutStreamer.EmitLabel(MBB->getSymbol()); @@ -2221,14 +2215,13 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { if (!MI.isBranch() || MI.isIndirectBranch()) return false; - // If we are the operands of one of the branches, this is not - // a fall through. - for (MachineInstr::mop_iterator OI = MI.operands_begin(), - OE = MI.operands_end(); OI != OE; ++OI) { - const MachineOperand& OP = *OI; - if (OP.isJTI()) + // If we are the operands of one of the branches, this is not a fall + // through. Note that targets with delay slots will usually bundle + // terminators with the delay slot instruction. + for (ConstMIBundleOperands OP(&MI); OP.isValid(); ++OP) { + if (OP->isJTI()) return false; - if (OP.isMBB() && OP.getMBB() == MBB) + if (OP->isMBB() && OP->getMBB() == MBB) return false; } } @@ -2261,3 +2254,6 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) { report_fatal_error("no GCMetadataPrinter registered for GC: " + Twine(Name)); } + +/// Pin vtable to this file. +AsmPrinterHandler::~AsmPrinterHandler() {} diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index b92f49c..b696069 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -12,7 +12,9 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "asm-printer" +#include "ByteStreamer.h" #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/Twine.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCAsmInfo.h" @@ -52,9 +54,9 @@ void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc, /// EmitCFAByte - Emit a .byte 42 directive for a DW_CFA_xxx value. void AsmPrinter::EmitCFAByte(unsigned Val) const { if (isVerbose()) { - if (Val >= dwarf::DW_CFA_offset && Val < dwarf::DW_CFA_offset+64) + if (Val >= dwarf::DW_CFA_offset && Val < dwarf::DW_CFA_offset + 64) OutStreamer.AddComment("DW_CFA_offset + Reg (" + - Twine(Val-dwarf::DW_CFA_offset) + ")"); + Twine(Val - dwarf::DW_CFA_offset) + ")"); else OutStreamer.AddComment(dwarf::CallFrameString(Val)); } @@ -63,43 +65,56 @@ void AsmPrinter::EmitCFAByte(unsigned Val) const { static const char *DecodeDWARFEncoding(unsigned Encoding) { switch (Encoding) { - case dwarf::DW_EH_PE_absptr: return "absptr"; - case dwarf::DW_EH_PE_omit: return "omit"; - case dwarf::DW_EH_PE_pcrel: return "pcrel"; - case dwarf::DW_EH_PE_udata4: return "udata4"; - case dwarf::DW_EH_PE_udata8: return "udata8"; - case dwarf::DW_EH_PE_sdata4: return "sdata4"; - case dwarf::DW_EH_PE_sdata8: return "sdata8"; - case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata4: return "pcrel udata4"; - case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4: return "pcrel sdata4"; - case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8: return "pcrel udata8"; - case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8: return "pcrel sdata8"; - case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata4: + case dwarf::DW_EH_PE_absptr: + return "absptr"; + case dwarf::DW_EH_PE_omit: + return "omit"; + case dwarf::DW_EH_PE_pcrel: + return "pcrel"; + case dwarf::DW_EH_PE_udata4: + return "udata4"; + case dwarf::DW_EH_PE_udata8: + return "udata8"; + case dwarf::DW_EH_PE_sdata4: + return "sdata4"; + case dwarf::DW_EH_PE_sdata8: + return "sdata8"; + case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata4: + return "pcrel udata4"; + case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4: + return "pcrel sdata4"; + case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8: + return "pcrel udata8"; + case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8: + return "pcrel sdata8"; + case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata4 + : return "indirect pcrel udata4"; - case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata4: + case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4 + : return "indirect pcrel sdata4"; - case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata8: + case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8 + : return "indirect pcrel udata8"; - case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata8: + case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8 + : return "indirect pcrel sdata8"; } return "<unknown encoding>"; } - /// EmitEncodingByte - Emit a .byte 42 directive that corresponds to an /// encoding. If verbose assembly output is enabled, we output comments /// describing the encoding. Desc is an optional string saying what the /// encoding is specifying (e.g. "LSDA"). void AsmPrinter::EmitEncodingByte(unsigned Val, const char *Desc) const { if (isVerbose()) { - if (Desc != 0) - OutStreamer.AddComment(Twine(Desc)+" Encoding = " + + if (Desc) + OutStreamer.AddComment(Twine(Desc) + " Encoding = " + Twine(DecodeDWARFEncoding(Val))); else - OutStreamer.AddComment(Twine("Encoding = ") + - DecodeDWARFEncoding(Val)); + OutStreamer.AddComment(Twine("Encoding = ") + DecodeDWARFEncoding(Val)); } OutStreamer.EmitIntValue(Val, 1); @@ -111,11 +126,16 @@ unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const { return 0; switch (Encoding & 0x07) { - default: llvm_unreachable("Invalid encoded value."); - case dwarf::DW_EH_PE_absptr: return TM.getDataLayout()->getPointerSize(); - case dwarf::DW_EH_PE_udata2: return 2; - case dwarf::DW_EH_PE_udata4: return 4; - case dwarf::DW_EH_PE_udata8: return 8; + default: + llvm_unreachable("Invalid encoded value."); + case dwarf::DW_EH_PE_absptr: + return TM.getDataLayout()->getPointerSize(); + case dwarf::DW_EH_PE_udata2: + return 2; + case dwarf::DW_EH_PE_udata4: + return 4; + case dwarf::DW_EH_PE_udata8: + return 8; } } @@ -125,7 +145,7 @@ void AsmPrinter::EmitTTypeReference(const GlobalValue *GV, const TargetLoweringObjectFile &TLOF = getObjFileLowering(); const MCExpr *Exp = - TLOF.getTTypeGlobalReference(GV, Mang, MMI, Encoding, OutStreamer); + TLOF.getTTypeGlobalReference(GV, Encoding, *Mang, TM, MMI, OutStreamer); OutStreamer.EmitValue(Exp, GetSizeOfEncodedValue(Encoding)); } else OutStreamer.EmitIntValue(0, GetSizeOfEncodedValue(Encoding)); @@ -165,6 +185,150 @@ void AsmPrinter::EmitSectionOffset(const MCSymbol *Label, EmitLabelDifference(Label, SectionLabel, 4); } +/// Emit a dwarf register operation. +static void emitDwarfRegOp(ByteStreamer &Streamer, int Reg) { + assert(Reg >= 0); + if (Reg < 32) { + Streamer.EmitInt8(dwarf::DW_OP_reg0 + Reg, + dwarf::OperationEncodingString(dwarf::DW_OP_reg0 + Reg)); + } else { + Streamer.EmitInt8(dwarf::DW_OP_regx, "DW_OP_regx"); + Streamer.EmitULEB128(Reg, Twine(Reg)); + } +} + +/// Emit an (double-)indirect dwarf register operation. +static void emitDwarfRegOpIndirect(ByteStreamer &Streamer, int Reg, int Offset, + bool Deref) { + assert(Reg >= 0); + if (Reg < 32) { + Streamer.EmitInt8(dwarf::DW_OP_breg0 + Reg, + dwarf::OperationEncodingString(dwarf::DW_OP_breg0 + Reg)); + } else { + Streamer.EmitInt8(dwarf::DW_OP_bregx, "DW_OP_bregx"); + Streamer.EmitULEB128(Reg, Twine(Reg)); + } + Streamer.EmitSLEB128(Offset); + if (Deref) + Streamer.EmitInt8(dwarf::DW_OP_deref, "DW_OP_deref"); +} + +/// Emit a dwarf register operation for describing +/// - a small value occupying only part of a register or +/// - a small register representing only part of a value. +static void emitDwarfOpPiece(ByteStreamer &Streamer, unsigned Size, + unsigned Offset) { + assert(Size > 0); + if (Offset > 0) { + Streamer.EmitInt8(dwarf::DW_OP_bit_piece, "DW_OP_bit_piece"); + Streamer.EmitULEB128(Size, Twine(Size)); + Streamer.EmitULEB128(Offset, Twine(Offset)); + } else { + Streamer.EmitInt8(dwarf::DW_OP_piece, "DW_OP_piece"); + unsigned ByteSize = Size / 8; // Assuming 8 bits per byte. + Streamer.EmitULEB128(ByteSize, Twine(ByteSize)); + } +} + +/// Some targets do not provide a DWARF register number for every +/// register. This function attempts to emit a dwarf register by +/// emitting a piece of a super-register or by piecing together +/// multiple subregisters that alias the register. +static void EmitDwarfRegOpPiece(ByteStreamer &Streamer, const AsmPrinter &AP, + const MachineLocation &MLoc) { + assert(!MLoc.isIndirect()); + const TargetRegisterInfo *TRI = AP.TM.getRegisterInfo(); + int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false); + + // Walk up the super-register chain until we find a valid number. + // For example, EAX on x86_64 is a 32-bit piece of RAX with offset 0. + for (MCSuperRegIterator SR(MLoc.getReg(), TRI); SR.isValid(); ++SR) { + Reg = TRI->getDwarfRegNum(*SR, false); + if (Reg >= 0) { + unsigned Idx = TRI->getSubRegIndex(*SR, MLoc.getReg()); + unsigned Size = TRI->getSubRegIdxSize(Idx); + unsigned Offset = TRI->getSubRegIdxOffset(Idx); + AP.OutStreamer.AddComment("super-register"); + emitDwarfRegOp(Streamer, Reg); + emitDwarfOpPiece(Streamer, Size, Offset); + return; + } + } + + // Otherwise, attempt to find a covering set of sub-register numbers. + // For example, Q0 on ARM is a composition of D0+D1. + // + // Keep track of the current position so we can emit the more + // efficient DW_OP_piece. + unsigned CurPos = 0; + // The size of the register in bits, assuming 8 bits per byte. + unsigned RegSize = TRI->getMinimalPhysRegClass(MLoc.getReg())->getSize() * 8; + // Keep track of the bits in the register we already emitted, so we + // can avoid emitting redundant aliasing subregs. + SmallBitVector Coverage(RegSize, false); + for (MCSubRegIterator SR(MLoc.getReg(), TRI); SR.isValid(); ++SR) { + unsigned Idx = TRI->getSubRegIndex(MLoc.getReg(), *SR); + unsigned Size = TRI->getSubRegIdxSize(Idx); + unsigned Offset = TRI->getSubRegIdxOffset(Idx); + Reg = TRI->getDwarfRegNum(*SR, false); + + // Intersection between the bits we already emitted and the bits + // covered by this subregister. + SmallBitVector Intersection(RegSize, false); + Intersection.set(Offset, Offset + Size); + Intersection ^= Coverage; + + // If this sub-register has a DWARF number and we haven't covered + // its range, emit a DWARF piece for it. + if (Reg >= 0 && Intersection.any()) { + AP.OutStreamer.AddComment("sub-register"); + emitDwarfRegOp(Streamer, Reg); + emitDwarfOpPiece(Streamer, Size, Offset == CurPos ? 0 : Offset); + CurPos = Offset + Size; + + // Mark it as emitted. + Coverage.set(Offset, Offset + Size); + } + } + + if (CurPos == 0) { + // FIXME: We have no reasonable way of handling errors in here. + Streamer.EmitInt8(dwarf::DW_OP_nop, + "nop (could not find a dwarf register number)"); + } +} + +/// EmitDwarfRegOp - Emit dwarf register operation. +void AsmPrinter::EmitDwarfRegOp(ByteStreamer &Streamer, + const MachineLocation &MLoc, + bool Indirect) const { + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false); + if (Reg < 0) { + // We assume that pointers are always in an addressable register. + if (Indirect || MLoc.isIndirect()) { + // FIXME: We have no reasonable way of handling errors in here. The + // caller might be in the middle of a dwarf expression. We should + // probably assert that Reg >= 0 once debug info generation is more + // mature. + Streamer.EmitInt8(dwarf::DW_OP_nop, + "nop (invalid dwarf register number for indirect loc)"); + return; + } + + // Attempt to find a valid super- or sub-register. + if (!Indirect && !MLoc.isIndirect()) + return EmitDwarfRegOpPiece(Streamer, *this, MLoc); + } + + if (MLoc.isIndirect()) + emitDwarfRegOpIndirect(Streamer, Reg, MLoc.getOffset(), Indirect); + else if (Indirect) + emitDwarfRegOpIndirect(Streamer, Reg, 0, false); + else + emitDwarfRegOp(Streamer, Reg); +} + //===----------------------------------------------------------------------===// // Dwarf Lowering Routines //===----------------------------------------------------------------------===// @@ -191,5 +355,8 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const { case MCCFIInstruction::OpWindowSave: OutStreamer.EmitCFIWindowSave(); break; + case MCCFIInstruction::OpSameValue: + OutStreamer.EmitCFISameValue(Inst.getRegister()); + break; } } diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h b/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h new file mode 100644 index 0000000..2825367 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h @@ -0,0 +1,57 @@ +//===-- lib/CodeGen/AsmPrinter/AsmPrinterHandler.h -------------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a generic interface for AsmPrinter handlers, +// like debug and EH info emitters. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ASMPRINTER_ASMPRINTERHANDLER_H__ +#define CODEGEN_ASMPRINTER_ASMPRINTERHANDLER_H__ + +#include "llvm/Support/DataTypes.h" + +namespace llvm { + +class MachineFunction; +class MachineInstr; +class MCSymbol; + +/// \brief Collects and handles AsmPrinter objects required to build debug +/// or EH information. +class AsmPrinterHandler { +public: + virtual ~AsmPrinterHandler(); + + /// \brief For symbols that have a size designated (e.g. common symbols), + /// this tracks that size. + virtual void setSymbolSize(const MCSymbol *Sym, uint64_t Size) = 0; + + /// \brief Emit all sections that should come after the content. + virtual void endModule() = 0; + + /// \brief Gather pre-function debug information. + /// Every beginFunction(MF) call should be followed by an endFunction(MF) + /// call. + virtual void beginFunction(const MachineFunction *MF) = 0; + + /// \brief Gather post-function debug information. + /// Please note that some AsmPrinter implementations may not call + /// beginFunction at all. + virtual void endFunction(const MachineFunction *MF) = 0; + + /// \brief Process beginning of an instruction. + virtual void beginInstruction(const MachineInstr *MI) = 0; + + /// \brief Process end of an instruction. + virtual void endInstruction() = 0; +}; +} // End of namespace llvm + +#endif diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 4f927f6..567b6e3 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -13,12 +13,12 @@ #define DEBUG_TYPE "asm-printer" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" @@ -33,6 +33,7 @@ #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; namespace { @@ -77,11 +78,17 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode, if (isNullTerminated) Str = Str.substr(0, Str.size()-1); - // If the output streamer is actually a .s file, just emit the blob textually. + // If the output streamer does not have mature MC support or the integrated + // assembler has been disabled, just emit the blob textually. + // Otherwise parse the asm and emit it via MC support. // This is useful in case the asm parser doesn't handle something but the // system assembler does. - if (OutStreamer.hasRawTextSupport()) { + const MCAsmInfo *MCAI = TM.getMCAsmInfo(); + assert(MCAI && "No MCAsmInfo"); + if (!MCAI->useIntegratedAssembler() && + !OutStreamer.isIntegratedAssemblerRequired()) { OutStreamer.EmitRawText(Str); + emitInlineAsmEnd(TM.getSubtarget<MCSubtargetInfo>(), 0); return; } @@ -110,20 +117,25 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode, // Tell SrcMgr about this buffer, it takes ownership of the buffer. SrcMgr.AddNewSourceBuffer(Buffer, SMLoc()); - OwningPtr<MCAsmParser> Parser(createMCAsmParser(SrcMgr, - OutContext, OutStreamer, - *MAI)); - - // FIXME: It would be nice if we can avoid createing a new instance of - // MCSubtargetInfo here given TargetSubtargetInfo is available. However, - // we have to watch out for asm directives which can change subtarget - // state. e.g. .code 16, .code 32. - OwningPtr<MCSubtargetInfo> - STI(TM.getTarget().createMCSubtargetInfo(TM.getTargetTriple(), - TM.getTargetCPU(), - TM.getTargetFeatureString())); - OwningPtr<MCTargetAsmParser> - TAP(TM.getTarget().createMCAsmParser(*STI, *Parser, *MII)); + std::unique_ptr<MCAsmParser> Parser( + createMCAsmParser(SrcMgr, OutContext, OutStreamer, *MAI)); + + // Initialize the parser with a fresh subtarget info. It is better to use a + // new STI here because the parser may modify it and we do not want those + // modifications to persist after parsing the inlineasm. The modifications + // made by the parser will be seen by the code emitters because it passes + // the current STI down to the EncodeInstruction() method. + std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo( + TM.getTargetTriple(), TM.getTargetCPU(), TM.getTargetFeatureString())); + + // Preserve a copy of the original STI because the parser may modify it. For + // example, when switching between arm and thumb mode. If the target needs to + // emit code to return to the original state it can do so in + // emitInlineAsmEnd(). + MCSubtargetInfo STIOrig = *STI; + + std::unique_ptr<MCTargetAsmParser> TAP( + TM.getTarget().createMCAsmParser(*STI, *Parser, *MII)); if (!TAP) report_fatal_error("Inline asm not supported by this streamer because" " we don't have an asm parser for this target\n"); @@ -133,6 +145,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode, // Don't implicitly switch to the text section before the asm. int Res = Parser->Run(/*NoInitialTextSection*/ true, /*NoFinalize*/ true); + emitInlineAsmEnd(STIOrig, STI.get()); if (Res && !HasDiagHandler) report_fatal_error("Error parsing inline asm\n"); } @@ -427,21 +440,14 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { // If this asmstr is empty, just print the #APP/#NOAPP markers. // These are useful to see where empty asm's wound up. if (AsmStr[0] == 0) { - // Don't emit the comments if writing to a .o file. - if (!OutStreamer.hasRawTextSupport()) return; - - OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+ - MAI->getInlineAsmStart()); - OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+ - MAI->getInlineAsmEnd()); + OutStreamer.emitRawComment(MAI->getInlineAsmStart()); + OutStreamer.emitRawComment(MAI->getInlineAsmEnd()); return; } // Emit the #APP start marker. This has to happen even if verbose-asm isn't - // enabled, so we use EmitRawText. - if (OutStreamer.hasRawTextSupport()) - OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+ - MAI->getInlineAsmStart()); + // enabled, so we use emitRawComment. + OutStreamer.emitRawComment(MAI->getInlineAsmStart()); // Get the !srcloc metadata node if we have it, and decode the loc cookie from // it. @@ -476,10 +482,8 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { EmitInlineAsm(OS.str(), LocMD, MI->getInlineAsmDialect()); // Emit the #NOAPP end marker. This has to happen even if verbose-asm isn't - // enabled, so we use EmitRawText. - if (OutStreamer.hasRawTextSupport()) - OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+ - MAI->getInlineAsmEnd()); + // enabled, so we use emitRawComment. + OutStreamer.emitRawComment(MAI->getInlineAsmEnd()); } @@ -491,8 +495,9 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { /// for their own strange codes. void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS, const char *Code) const { + const DataLayout *DL = TM.getDataLayout(); if (!strcmp(Code, "private")) { - OS << MAI->getPrivateGlobalPrefix(); + OS << DL->getPrivateGlobalPrefix(); } else if (!strcmp(Code, "comment")) { OS << MAI->getCommentString(); } else if (!strcmp(Code, "uid")) { @@ -551,3 +556,5 @@ bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, return true; } +void AsmPrinter::emitInlineAsmEnd(const MCSubtargetInfo &StartInfo, + const MCSubtargetInfo *EndInfo) const {} diff --git a/lib/CodeGen/AsmPrinter/ByteStreamer.h b/lib/CodeGen/AsmPrinter/ByteStreamer.h new file mode 100644 index 0000000..6c01d65 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/ByteStreamer.h @@ -0,0 +1,71 @@ +//===-- llvm/CodeGen/ByteStreamer.h - ByteStreamer class --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a class that can take bytes that would normally be +// streamed via the AsmPrinter. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_BYTESTREAMER_H +#define LLVM_CODEGEN_BYTESTREAMER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/MC/MCStreamer.h" +#include "DIEHash.h" + +namespace llvm { +class ByteStreamer { + public: + virtual ~ByteStreamer() {} + + // For now we're just handling the calls we need for dwarf emission/hashing. + virtual void EmitInt8(uint8_t Byte, const Twine &Comment = "") = 0; + virtual void EmitSLEB128(uint64_t DWord, const Twine &Comment = "") = 0; + virtual void EmitULEB128(uint64_t DWord, const Twine &Comment = "") = 0; +}; + +class APByteStreamer : public ByteStreamer { +private: + AsmPrinter &AP; + +public: + APByteStreamer(AsmPrinter &Asm) : AP(Asm) {} + void EmitInt8(uint8_t Byte, const Twine &Comment) override { + AP.OutStreamer.AddComment(Comment); + AP.EmitInt8(Byte); + } + void EmitSLEB128(uint64_t DWord, const Twine &Comment) override { + AP.OutStreamer.AddComment(Comment); + AP.EmitSLEB128(DWord); + } + void EmitULEB128(uint64_t DWord, const Twine &Comment) override { + AP.OutStreamer.AddComment(Comment); + AP.EmitULEB128(DWord); + } +}; + +class HashingByteStreamer : public ByteStreamer { + private: + DIEHash &Hash; + public: + HashingByteStreamer(DIEHash &H) : Hash(H) {} + void EmitInt8(uint8_t Byte, const Twine &Comment) override { + Hash.update(Byte); + } + void EmitSLEB128(uint64_t DWord, const Twine &Comment) override { + Hash.addSLEB128(DWord); + } + void EmitULEB128(uint64_t DWord, const Twine &Comment) override { + Hash.addULEB128(DWord); + } +}; +} + +#endif diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt index be484a6..b3eddac 100644 --- a/lib/CodeGen/AsmPrinter/CMakeLists.txt +++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt @@ -7,12 +7,13 @@ add_llvm_library(LLVMAsmPrinter DIEHash.cpp DwarfAccelTable.cpp DwarfCFIException.cpp - DwarfCompileUnit.cpp DwarfDebug.cpp DwarfException.cpp + DwarfUnit.cpp ErlangGCPrinter.cpp OcamlGCPrinter.cpp Win64Exception.cpp + WinCodeViewLineTables.cpp ) add_dependencies(LLVMAsmPrinter intrinsics_gen) diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index 6944428..26e8f2d 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -13,17 +13,18 @@ #include "DIE.h" #include "DwarfDebug.h" +#include "DwarfUnit.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormattedStream.h" +#include "llvm/Support/LEB128.h" #include "llvm/Support/MD5.h" using namespace llvm; @@ -48,7 +49,7 @@ void DIEAbbrevData::Profile(FoldingSetNodeID &ID) const { /// void DIEAbbrev::Profile(FoldingSetNodeID &ID) const { ID.AddInteger(unsigned(Tag)); - ID.AddInteger(ChildrenFlag); + ID.AddInteger(unsigned(Children)); // For each attribute description. for (unsigned i = 0, N = Data.size(); i < N; ++i) @@ -62,7 +63,7 @@ void DIEAbbrev::Emit(AsmPrinter *AP) const { AP->EmitULEB128(Tag, dwarf::TagString(Tag)); // Emit whether it has children DIEs. - AP->EmitULEB128(ChildrenFlag, dwarf::ChildrenString(ChildrenFlag)); + AP->EmitULEB128((unsigned)Children, dwarf::ChildrenString(Children)); // For each attribute description. for (unsigned i = 0, N = Data.size(); i < N; ++i) { @@ -89,7 +90,7 @@ void DIEAbbrev::print(raw_ostream &O) { << " " << dwarf::TagString(Tag) << " " - << dwarf::ChildrenString(ChildrenFlag) + << dwarf::ChildrenString(Children) << '\n'; for (unsigned i = 0, N = Data.size(); i < N; ++i) { @@ -112,27 +113,28 @@ DIE::~DIE() { delete Children[i]; } -/// Climb up the parent chain to get the compile unit DIE to which this DIE +/// Climb up the parent chain to get the unit DIE to which this DIE /// belongs. -const DIE *DIE::getCompileUnit() const { - const DIE *Cu = getCompileUnitOrNull(); +const DIE *DIE::getUnit() const { + const DIE *Cu = getUnitOrNull(); assert(Cu && "We should not have orphaned DIEs."); return Cu; } -/// Climb up the parent chain to get the compile unit DIE this DIE belongs +/// Climb up the parent chain to get the unit DIE this DIE belongs /// to. Return NULL if DIE is not added to an owner yet. -const DIE *DIE::getCompileUnitOrNull() const { +const DIE *DIE::getUnitOrNull() const { const DIE *p = this; while (p) { - if (p->getTag() == dwarf::DW_TAG_compile_unit) + if (p->getTag() == dwarf::DW_TAG_compile_unit || + p->getTag() == dwarf::DW_TAG_type_unit) return p; p = p->getParent(); } return NULL; } -DIEValue *DIE::findAttribute(uint16_t Attribute) { +DIEValue *DIE::findAttribute(dwarf::Attribute Attribute) const { const SmallVectorImpl<DIEValue *> &Values = getValues(); const DIEAbbrev &Abbrevs = getAbbrev(); @@ -159,7 +161,7 @@ void DIE::print(raw_ostream &O, unsigned IndentCount) const { O << Indent << dwarf::TagString(Abbrev.getTag()) << " " - << dwarf::ChildrenString(Abbrev.getChildrenFlag()) << "\n"; + << dwarf::ChildrenString(Abbrev.hasChildren()) << "\n"; } else { O << "Size: " << Size << "\n"; } @@ -215,8 +217,7 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const { case dwarf::DW_FORM_flag_present: // Emit something to keep the lines and comments in sync. // FIXME: Is there a better way to do this? - if (Asm->OutStreamer.hasRawTextSupport()) - Asm->OutStreamer.EmitRawText(""); + Asm->OutStreamer.AddBlankLine(); return; case dwarf::DW_FORM_flag: // Fall thru case dwarf::DW_FORM_ref1: // Fall thru @@ -227,6 +228,7 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const { case dwarf::DW_FORM_ref4: // Fall thru case dwarf::DW_FORM_data4: Size = 4; break; case dwarf::DW_FORM_ref8: // Fall thru + case dwarf::DW_FORM_ref_sig8: // Fall thru case dwarf::DW_FORM_data8: Size = 8; break; case dwarf::DW_FORM_GNU_str_index: Asm->EmitULEB128(Integer); return; case dwarf::DW_FORM_GNU_addr_index: Asm->EmitULEB128(Integer); return; @@ -253,11 +255,12 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { case dwarf::DW_FORM_ref4: // Fall thru case dwarf::DW_FORM_data4: return sizeof(int32_t); case dwarf::DW_FORM_ref8: // Fall thru + case dwarf::DW_FORM_ref_sig8: // Fall thru case dwarf::DW_FORM_data8: return sizeof(int64_t); - case dwarf::DW_FORM_GNU_str_index: return MCAsmInfo::getULEB128Size(Integer); - case dwarf::DW_FORM_GNU_addr_index: return MCAsmInfo::getULEB128Size(Integer); - case dwarf::DW_FORM_udata: return MCAsmInfo::getULEB128Size(Integer); - case dwarf::DW_FORM_sdata: return MCAsmInfo::getSLEB128Size(Integer); + case dwarf::DW_FORM_GNU_str_index: return getULEB128Size(Integer); + case dwarf::DW_FORM_GNU_addr_index: return getULEB128Size(Integer); + case dwarf::DW_FORM_udata: return getULEB128Size(Integer); + case dwarf::DW_FORM_sdata: return getSLEB128Size(Integer); case dwarf::DW_FORM_addr: return AP->getDataLayout().getPointerSize(); default: llvm_unreachable("DIE Value form not supported yet"); } @@ -338,6 +341,7 @@ void DIEDelta::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { /// unsigned DIEDelta::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_data4) return 4; + if (Form == dwarf::DW_FORM_sec_offset) return 4; if (Form == dwarf::DW_FORM_strp) return 4; return AP->getDataLayout().getPointerSize(); } @@ -378,7 +382,26 @@ void DIEString::print(raw_ostream &O) const { /// EmitValue - Emit debug information entry offset. /// void DIEEntry::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { - AP->EmitInt32(Entry->getOffset()); + + if (Form == dwarf::DW_FORM_ref_addr) { + const DwarfDebug *DD = AP->getDwarfDebug(); + unsigned Addr = Entry->getOffset(); + assert(!DD->useSplitDwarf() && "TODO: dwo files can't have relocations."); + // For DW_FORM_ref_addr, output the offset from beginning of debug info + // section. Entry->getOffset() returns the offset from start of the + // compile unit. + DwarfCompileUnit *CU = DD->lookupUnit(Entry->getUnit()); + assert(CU && "CUDie should belong to a CU."); + Addr += CU->getDebugInfoOffset(); + if (AP->MAI->doesDwarfUseRelocationsAcrossSections()) + AP->EmitLabelPlusOffset(CU->getSectionSym(), Addr, + DIEEntry::getRefAddrSize(AP)); + else + AP->EmitLabelOffsetDifference(CU->getSectionSym(), Addr, + CU->getSectionSym(), + DIEEntry::getRefAddrSize(AP)); + } else + AP->EmitInt32(Entry->getOffset()); } unsigned DIEEntry::getRefAddrSize(AsmPrinter *AP) { @@ -386,7 +409,9 @@ unsigned DIEEntry::getRefAddrSize(AsmPrinter *AP) { // specified to be four bytes in the DWARF 32-bit format and eight bytes // in the DWARF 64-bit format, while DWARF Version 2 specifies that such // references have the same size as an address on the target system. - if (AP->getDwarfDebug()->getDwarfVersion() == 2) + const DwarfDebug *DD = AP->getDwarfDebug(); + assert(DD && "Expected Dwarf Debug info to be available"); + if (DD->getDwarfVersion() == 2) return AP->getDataLayout().getPointerSize(); return sizeof(int32_t); } @@ -398,12 +423,83 @@ void DIEEntry::print(raw_ostream &O) const { #endif //===----------------------------------------------------------------------===// +// DIETypeSignature Implementation +//===----------------------------------------------------------------------===// +void DIETypeSignature::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const { + assert(Form == dwarf::DW_FORM_ref_sig8); + Asm->OutStreamer.EmitIntValue(Unit.getTypeSignature(), 8); +} + +#ifndef NDEBUG +void DIETypeSignature::print(raw_ostream &O) const { + O << format("Type Unit: 0x%lx", Unit.getTypeSignature()); +} + +void DIETypeSignature::dump() const { print(dbgs()); } +#endif + +//===----------------------------------------------------------------------===// +// DIELoc Implementation +//===----------------------------------------------------------------------===// + +/// ComputeSize - calculate the size of the location expression. +/// +unsigned DIELoc::ComputeSize(AsmPrinter *AP) const { + if (!Size) { + const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData(); + for (unsigned i = 0, N = Values.size(); i < N; ++i) + Size += Values[i]->SizeOf(AP, AbbrevData[i].getForm()); + } + + return Size; +} + +/// EmitValue - Emit location data. +/// +void DIELoc::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const { + switch (Form) { + default: llvm_unreachable("Improper form for block"); + case dwarf::DW_FORM_block1: Asm->EmitInt8(Size); break; + case dwarf::DW_FORM_block2: Asm->EmitInt16(Size); break; + case dwarf::DW_FORM_block4: Asm->EmitInt32(Size); break; + case dwarf::DW_FORM_block: + case dwarf::DW_FORM_exprloc: + Asm->EmitULEB128(Size); break; + } + + const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData(); + for (unsigned i = 0, N = Values.size(); i < N; ++i) + Values[i]->EmitValue(Asm, AbbrevData[i].getForm()); +} + +/// SizeOf - Determine size of location data in bytes. +/// +unsigned DIELoc::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { + switch (Form) { + case dwarf::DW_FORM_block1: return Size + sizeof(int8_t); + case dwarf::DW_FORM_block2: return Size + sizeof(int16_t); + case dwarf::DW_FORM_block4: return Size + sizeof(int32_t); + case dwarf::DW_FORM_block: + case dwarf::DW_FORM_exprloc: + return Size + getULEB128Size(Size); + default: llvm_unreachable("Improper form for block"); + } +} + +#ifndef NDEBUG +void DIELoc::print(raw_ostream &O) const { + O << "ExprLoc: "; + DIE::print(O, 5); +} +#endif + +//===----------------------------------------------------------------------===// // DIEBlock Implementation //===----------------------------------------------------------------------===// /// ComputeSize - calculate the size of the block. /// -unsigned DIEBlock::ComputeSize(AsmPrinter *AP) { +unsigned DIEBlock::ComputeSize(AsmPrinter *AP) const { if (!Size) { const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData(); for (unsigned i = 0, N = Values.size(); i < N; ++i) @@ -436,7 +532,7 @@ unsigned DIEBlock::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { case dwarf::DW_FORM_block1: return Size + sizeof(int8_t); case dwarf::DW_FORM_block2: return Size + sizeof(int16_t); case dwarf::DW_FORM_block4: return Size + sizeof(int32_t); - case dwarf::DW_FORM_block: return Size + MCAsmInfo::getULEB128Size(Size); + case dwarf::DW_FORM_block: return Size + getULEB128Size(Size); default: llvm_unreachable("Improper form for block"); } } @@ -447,3 +543,34 @@ void DIEBlock::print(raw_ostream &O) const { DIE::print(O, 5); } #endif + +//===----------------------------------------------------------------------===// +// DIELocList Implementation +//===----------------------------------------------------------------------===// + +unsigned DIELocList::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { + if (Form == dwarf::DW_FORM_data4) + return 4; + if (Form == dwarf::DW_FORM_sec_offset) + return 4; + return AP->getDataLayout().getPointerSize(); +} + +/// EmitValue - Emit label value. +/// +void DIELocList::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { + DwarfDebug *DD = AP->getDwarfDebug(); + MCSymbol *Label = DD->getDebugLocEntries()[Index].Label; + + if (AP->MAI->doesDwarfUseRelocationsAcrossSections() && !DD->useSplitDwarf()) + AP->EmitSectionOffset(Label, DD->getDebugLocSym()); + else + AP->EmitLabelDifference(Label, DD->getDebugLocSym(), 4); +} + +#ifndef NDEBUG +void DIELocList::print(raw_ostream &O) const { + O << "LocList: " << Index; + +} +#endif diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h index f4fa326..7fefd4f 100644 --- a/lib/CodeGen/AsmPrinter/DIE.h +++ b/lib/CodeGen/AsmPrinter/DIE.h @@ -16,437 +16,560 @@ #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Dwarf.h" -#include "llvm/MC/MCExpr.h" #include <vector> namespace llvm { - class AsmPrinter; - class MCSymbol; - class MCSymbolRefExpr; - class raw_ostream; - - //===--------------------------------------------------------------------===// - /// DIEAbbrevData - Dwarf abbreviation data, describes one attribute of a - /// Dwarf abbreviation. - class DIEAbbrevData { - /// Attribute - Dwarf attribute code. - /// - dwarf::Attribute Attribute; - - /// Form - Dwarf form code. - /// - dwarf::Form Form; - public: - DIEAbbrevData(dwarf::Attribute A, dwarf::Form F) : Attribute(A), Form(F) {} - - // Accessors. - dwarf::Attribute getAttribute() const { return Attribute; } - dwarf::Form getForm() const { return Form; } - - /// Profile - Used to gather unique data for the abbreviation folding set. - /// - void Profile(FoldingSetNodeID &ID) const; - }; +class AsmPrinter; +class MCExpr; +class MCSymbol; +class raw_ostream; +class DwarfTypeUnit; + +//===--------------------------------------------------------------------===// +/// DIEAbbrevData - Dwarf abbreviation data, describes one attribute of a +/// Dwarf abbreviation. +class DIEAbbrevData { + /// Attribute - Dwarf attribute code. + /// + dwarf::Attribute Attribute; - //===--------------------------------------------------------------------===// - /// DIEAbbrev - Dwarf abbreviation, describes the organization of a debug - /// information object. - class DIEAbbrev : public FoldingSetNode { - /// Tag - Dwarf tag code. - /// - dwarf::Tag Tag; - - /// ChildrenFlag - Dwarf children flag. - /// - uint16_t ChildrenFlag; - - /// Unique number for node. - /// - unsigned Number; - - /// Data - Raw data bytes for abbreviation. - /// - SmallVector<DIEAbbrevData, 12> Data; - - public: - DIEAbbrev(dwarf::Tag T, uint16_t C) : Tag(T), ChildrenFlag(C), Data() {} - - // Accessors. - dwarf::Tag getTag() const { return Tag; } - unsigned getNumber() const { return Number; } - uint16_t getChildrenFlag() const { return ChildrenFlag; } - const SmallVectorImpl<DIEAbbrevData> &getData() const { return Data; } - void setChildrenFlag(uint16_t CF) { ChildrenFlag = CF; } - void setNumber(unsigned N) { Number = N; } - - /// AddAttribute - Adds another set of attribute information to the - /// abbreviation. - void AddAttribute(dwarf::Attribute Attribute, dwarf::Form Form) { - Data.push_back(DIEAbbrevData(Attribute, Form)); - } + /// Form - Dwarf form code. + /// + dwarf::Form Form; + +public: + DIEAbbrevData(dwarf::Attribute A, dwarf::Form F) : Attribute(A), Form(F) {} + + // Accessors. + dwarf::Attribute getAttribute() const { return Attribute; } + dwarf::Form getForm() const { return Form; } + + /// Profile - Used to gather unique data for the abbreviation folding set. + /// + void Profile(FoldingSetNodeID &ID) const; +}; + +//===--------------------------------------------------------------------===// +/// DIEAbbrev - Dwarf abbreviation, describes the organization of a debug +/// information object. +class DIEAbbrev : public FoldingSetNode { + /// Unique number for node. + /// + unsigned Number; - /// Profile - Used to gather unique data for the abbreviation folding set. - /// - void Profile(FoldingSetNodeID &ID) const; + /// Tag - Dwarf tag code. + /// + dwarf::Tag Tag; - /// Emit - Print the abbreviation using the specified asm printer. - /// - void Emit(AsmPrinter *AP) const; + /// Children - Whether or not this node has children. + /// + // This cheats a bit in all of the uses since the values in the standard + // are 0 and 1 for no children and children respectively. + bool Children; + + /// Data - Raw data bytes for abbreviation. + /// + SmallVector<DIEAbbrevData, 12> Data; + +public: + DIEAbbrev(dwarf::Tag T, bool C) : Tag(T), Children(C), Data() {} + + // Accessors. + dwarf::Tag getTag() const { return Tag; } + unsigned getNumber() const { return Number; } + bool hasChildren() const { return Children; } + const SmallVectorImpl<DIEAbbrevData> &getData() const { return Data; } + void setChildrenFlag(bool hasChild) { Children = hasChild; } + void setNumber(unsigned N) { Number = N; } + + /// AddAttribute - Adds another set of attribute information to the + /// abbreviation. + void AddAttribute(dwarf::Attribute Attribute, dwarf::Form Form) { + Data.push_back(DIEAbbrevData(Attribute, Form)); + } + + /// Profile - Used to gather unique data for the abbreviation folding set. + /// + void Profile(FoldingSetNodeID &ID) const; + + /// Emit - Print the abbreviation using the specified asm printer. + /// + void Emit(AsmPrinter *AP) const; #ifndef NDEBUG - void print(raw_ostream &O); - void dump(); + void print(raw_ostream &O); + void dump(); #endif - }; +}; - //===--------------------------------------------------------------------===// - /// DIE - A structured debug information entry. Has an abbreviation which - /// describes its organization. - class DIEValue; - - class DIE { - protected: - /// Offset - Offset in debug info section. - /// - unsigned Offset; - - /// Size - Size of instance + children. - /// - unsigned Size; - - /// Abbrev - Buffer for constructing abbreviation. - /// - DIEAbbrev Abbrev; - - /// Children DIEs. - /// - std::vector<DIE *> Children; - - DIE *Parent; - - /// Attribute values. - /// - SmallVector<DIEValue*, 12> Values; - - public: - explicit DIE(unsigned Tag) - : Offset(0), Size(0), Abbrev((dwarf::Tag)Tag, dwarf::DW_CHILDREN_no), - Parent(0) {} - virtual ~DIE(); - - // Accessors. - DIEAbbrev &getAbbrev() { return Abbrev; } - const DIEAbbrev &getAbbrev() const { return Abbrev; } - unsigned getAbbrevNumber() const { return Abbrev.getNumber(); } - dwarf::Tag getTag() const { return Abbrev.getTag(); } - unsigned getOffset() const { return Offset; } - unsigned getSize() const { return Size; } - const std::vector<DIE *> &getChildren() const { return Children; } - const SmallVectorImpl<DIEValue*> &getValues() const { return Values; } - DIE *getParent() const { return Parent; } - /// Climb up the parent chain to get the compile unit DIE this DIE belongs - /// to. - const DIE *getCompileUnit() const; - /// Similar to getCompileUnit, returns null when DIE is not added to an - /// owner yet. - const DIE *getCompileUnitOrNull() const; - void setOffset(unsigned O) { Offset = O; } - void setSize(unsigned S) { Size = S; } - - /// addValue - Add a value and attributes to a DIE. - /// - void addValue(dwarf::Attribute Attribute, dwarf::Form Form, - DIEValue *Value) { - Abbrev.AddAttribute(Attribute, Form); - Values.push_back(Value); - } +//===--------------------------------------------------------------------===// +/// DIE - A structured debug information entry. Has an abbreviation which +/// describes its organization. +class DIEValue; - /// addChild - Add a child to the DIE. - /// - void addChild(DIE *Child) { - assert(!Child->getParent()); - Abbrev.setChildrenFlag(dwarf::DW_CHILDREN_yes); - Children.push_back(Child); - Child->Parent = this; - } +class DIE { +protected: + /// Offset - Offset in debug info section. + /// + unsigned Offset; + + /// Size - Size of instance + children. + /// + unsigned Size; + + /// Abbrev - Buffer for constructing abbreviation. + /// + DIEAbbrev Abbrev; + + /// Children DIEs. + /// + std::vector<DIE *> Children; + + DIE *Parent; - /// findAttribute - Find a value in the DIE with the attribute given, returns NULL - /// if no such attribute exists. - DIEValue *findAttribute(uint16_t Attribute); + /// Attribute values. + /// + SmallVector<DIEValue *, 12> Values; + +public: + explicit DIE(unsigned Tag) + : Offset(0), Size(0), Abbrev((dwarf::Tag)Tag, dwarf::DW_CHILDREN_no), + Parent(0) {} + ~DIE(); + + // Accessors. + DIEAbbrev &getAbbrev() { return Abbrev; } + const DIEAbbrev &getAbbrev() const { return Abbrev; } + unsigned getAbbrevNumber() const { return Abbrev.getNumber(); } + dwarf::Tag getTag() const { return Abbrev.getTag(); } + unsigned getOffset() const { return Offset; } + unsigned getSize() const { return Size; } + const std::vector<DIE *> &getChildren() const { return Children; } + const SmallVectorImpl<DIEValue *> &getValues() const { return Values; } + DIE *getParent() const { return Parent; } + /// Climb up the parent chain to get the compile or type unit DIE this DIE + /// belongs to. + const DIE *getUnit() const; + /// Similar to getUnit, returns null when DIE is not added to an + /// owner yet. + const DIE *getUnitOrNull() const; + void setOffset(unsigned O) { Offset = O; } + void setSize(unsigned S) { Size = S; } + + /// addValue - Add a value and attributes to a DIE. + /// + void addValue(dwarf::Attribute Attribute, dwarf::Form Form, DIEValue *Value) { + Abbrev.AddAttribute(Attribute, Form); + Values.push_back(Value); + } + + /// addChild - Add a child to the DIE. + /// + void addChild(DIE *Child) { + assert(!Child->getParent()); + Abbrev.setChildrenFlag(dwarf::DW_CHILDREN_yes); + Children.push_back(Child); + Child->Parent = this; + } + + /// findAttribute - Find a value in the DIE with the attribute given, + /// returns NULL if no such attribute exists. + DIEValue *findAttribute(dwarf::Attribute Attribute) const; #ifndef NDEBUG - void print(raw_ostream &O, unsigned IndentCount = 0) const; - void dump(); + void print(raw_ostream &O, unsigned IndentCount = 0) const; + void dump(); #endif +}; + +//===--------------------------------------------------------------------===// +/// DIEValue - A debug information entry value. Some of these roughly correlate +/// to DWARF attribute classes. +/// +class DIEValue { + virtual void anchor(); + +public: + enum Type { + isInteger, + isString, + isExpr, + isLabel, + isDelta, + isEntry, + isTypeSignature, + isBlock, + isLoc, + isLocList, }; - //===--------------------------------------------------------------------===// - /// DIEValue - A debug information entry value. - /// - class DIEValue { - virtual void anchor(); - public: - enum { - isInteger, - isString, - isExpr, - isLabel, - isDelta, - isEntry, - isBlock - }; - protected: - /// Type - Type of data stored in the value. - /// - unsigned Type; - public: - explicit DIEValue(unsigned T) : Type(T) {} - virtual ~DIEValue() {} - - // Accessors - unsigned getType() const { return Type; } - - /// EmitValue - Emit value via the Dwarf writer. - /// - virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const = 0; - - /// SizeOf - Return the size of a value in bytes. - /// - virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const = 0; +protected: + /// Ty - Type of data stored in the value. + /// + Type Ty; + + explicit DIEValue(Type T) : Ty(T) {} + virtual ~DIEValue() {} + +public: + // Accessors + Type getType() const { return Ty; } + + /// EmitValue - Emit value via the Dwarf writer. + /// + virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const = 0; + + /// SizeOf - Return the size of a value in bytes. + /// + virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const = 0; #ifndef NDEBUG - virtual void print(raw_ostream &O) const = 0; - void dump() const; + virtual void print(raw_ostream &O) const = 0; + void dump() const; #endif - }; +}; + +//===--------------------------------------------------------------------===// +/// DIEInteger - An integer value DIE. +/// +class DIEInteger : public DIEValue { + uint64_t Integer; - //===--------------------------------------------------------------------===// - /// DIEInteger - An integer value DIE. - /// - class DIEInteger : public DIEValue { - uint64_t Integer; - public: - explicit DIEInteger(uint64_t I) : DIEValue(isInteger), Integer(I) {} - - /// BestForm - Choose the best form for integer. - /// - static dwarf::Form BestForm(bool IsSigned, uint64_t Int) { - if (IsSigned) { - const int64_t SignedInt = Int; - if ((char)Int == SignedInt) return dwarf::DW_FORM_data1; - if ((short)Int == SignedInt) return dwarf::DW_FORM_data2; - if ((int)Int == SignedInt) return dwarf::DW_FORM_data4; - } else { - if ((unsigned char)Int == Int) return dwarf::DW_FORM_data1; - if ((unsigned short)Int == Int) return dwarf::DW_FORM_data2; - if ((unsigned int)Int == Int) return dwarf::DW_FORM_data4; - } - return dwarf::DW_FORM_data8; +public: + explicit DIEInteger(uint64_t I) : DIEValue(isInteger), Integer(I) {} + + /// BestForm - Choose the best form for integer. + /// + static dwarf::Form BestForm(bool IsSigned, uint64_t Int) { + if (IsSigned) { + const int64_t SignedInt = Int; + if ((char)Int == SignedInt) + return dwarf::DW_FORM_data1; + if ((short)Int == SignedInt) + return dwarf::DW_FORM_data2; + if ((int)Int == SignedInt) + return dwarf::DW_FORM_data4; + } else { + if ((unsigned char)Int == Int) + return dwarf::DW_FORM_data1; + if ((unsigned short)Int == Int) + return dwarf::DW_FORM_data2; + if ((unsigned int)Int == Int) + return dwarf::DW_FORM_data4; } + return dwarf::DW_FORM_data8; + } - /// EmitValue - Emit integer of appropriate size. - /// - virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const; + /// EmitValue - Emit integer of appropriate size. + /// + void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override; - uint64_t getValue() const { return Integer; } + uint64_t getValue() const { return Integer; } - /// SizeOf - Determine size of integer value in bytes. - /// - virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const; + /// SizeOf - Determine size of integer value in bytes. + /// + unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override; - // Implement isa/cast/dyncast. - static bool classof(const DIEValue *I) { return I->getType() == isInteger; } + // Implement isa/cast/dyncast. + static bool classof(const DIEValue *I) { return I->getType() == isInteger; } #ifndef NDEBUG - virtual void print(raw_ostream &O) const; + void print(raw_ostream &O) const override; #endif - }; +}; - //===--------------------------------------------------------------------===// - /// DIEExpr - An expression DIE. - // - class DIEExpr : public DIEValue { - const MCExpr *Expr; - public: - explicit DIEExpr(const MCExpr *E) : DIEValue(isExpr), Expr(E) {} +//===--------------------------------------------------------------------===// +/// DIEExpr - An expression DIE. +// +class DIEExpr : public DIEValue { + const MCExpr *Expr; - /// EmitValue - Emit expression value. - /// - virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const; +public: + explicit DIEExpr(const MCExpr *E) : DIEValue(isExpr), Expr(E) {} - /// getValue - Get MCExpr. - /// - const MCExpr *getValue() const { return Expr; } + /// EmitValue - Emit expression value. + /// + void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override; - /// SizeOf - Determine size of expression value in bytes. - /// - virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const; + /// getValue - Get MCExpr. + /// + const MCExpr *getValue() const { return Expr; } + + /// SizeOf - Determine size of expression value in bytes. + /// + unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override; - // Implement isa/cast/dyncast. - static bool classof(const DIEValue *E) { return E->getType() == isExpr; } + // Implement isa/cast/dyncast. + static bool classof(const DIEValue *E) { return E->getType() == isExpr; } #ifndef NDEBUG - virtual void print(raw_ostream &O) const; + void print(raw_ostream &O) const override; #endif - }; +}; + +//===--------------------------------------------------------------------===// +/// DIELabel - A label DIE. +// +class DIELabel : public DIEValue { + const MCSymbol *Label; - //===--------------------------------------------------------------------===// - /// DIELabel - A label DIE. - // - class DIELabel : public DIEValue { - const MCSymbol *Label; - public: - explicit DIELabel(const MCSymbol *L) : DIEValue(isLabel), Label(L) {} +public: + explicit DIELabel(const MCSymbol *L) : DIEValue(isLabel), Label(L) {} - /// EmitValue - Emit label value. - /// - virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const; + /// EmitValue - Emit label value. + /// + void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override; - /// getValue - Get MCSymbol. - /// - const MCSymbol *getValue() const { return Label; } + /// getValue - Get MCSymbol. + /// + const MCSymbol *getValue() const { return Label; } - /// SizeOf - Determine size of label value in bytes. - /// - virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const; + /// SizeOf - Determine size of label value in bytes. + /// + unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override; - // Implement isa/cast/dyncast. - static bool classof(const DIEValue *L) { return L->getType() == isLabel; } + // Implement isa/cast/dyncast. + static bool classof(const DIEValue *L) { return L->getType() == isLabel; } #ifndef NDEBUG - virtual void print(raw_ostream &O) const; + void print(raw_ostream &O) const override; #endif - }; +}; - //===--------------------------------------------------------------------===// - /// DIEDelta - A simple label difference DIE. - /// - class DIEDelta : public DIEValue { - const MCSymbol *LabelHi; - const MCSymbol *LabelLo; - public: - DIEDelta(const MCSymbol *Hi, const MCSymbol *Lo) +//===--------------------------------------------------------------------===// +/// DIEDelta - A simple label difference DIE. +/// +class DIEDelta : public DIEValue { + const MCSymbol *LabelHi; + const MCSymbol *LabelLo; + +public: + DIEDelta(const MCSymbol *Hi, const MCSymbol *Lo) : DIEValue(isDelta), LabelHi(Hi), LabelLo(Lo) {} - /// EmitValue - Emit delta value. - /// - virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const; + /// EmitValue - Emit delta value. + /// + void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override; - /// SizeOf - Determine size of delta value in bytes. - /// - virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const; + /// SizeOf - Determine size of delta value in bytes. + /// + unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override; - // Implement isa/cast/dyncast. - static bool classof(const DIEValue *D) { return D->getType() == isDelta; } + // Implement isa/cast/dyncast. + static bool classof(const DIEValue *D) { return D->getType() == isDelta; } #ifndef NDEBUG - virtual void print(raw_ostream &O) const; + void print(raw_ostream &O) const override; #endif - }; +}; + +//===--------------------------------------------------------------------===// +/// DIEString - A container for string values. +/// +class DIEString : public DIEValue { + const DIEValue *Access; + const StringRef Str; + +public: + DIEString(const DIEValue *Acc, const StringRef S) + : DIEValue(isString), Access(Acc), Str(S) {} - //===--------------------------------------------------------------------===// - /// DIEString - A container for string values. + /// getString - Grab the string out of the object. + StringRef getString() const { return Str; } + + /// EmitValue - Emit delta value. /// - class DIEString : public DIEValue { - const DIEValue *Access; - const StringRef Str; + void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override; - public: - DIEString(const DIEValue *Acc, const StringRef S) - : DIEValue(isString), Access(Acc), Str(S) {} + /// SizeOf - Determine size of delta value in bytes. + /// + unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override; - /// getString - Grab the string out of the object. - StringRef getString() const { return Str; } + // Implement isa/cast/dyncast. + static bool classof(const DIEValue *D) { return D->getType() == isString; } - /// EmitValue - Emit delta value. - /// - virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const; +#ifndef NDEBUG + void print(raw_ostream &O) const override; +#endif +}; - /// SizeOf - Determine size of delta value in bytes. - /// - virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const; +//===--------------------------------------------------------------------===// +/// DIEEntry - A pointer to another debug information entry. An instance of +/// this class can also be used as a proxy for a debug information entry not +/// yet defined (ie. types.) +class DIEEntry : public DIEValue { + DIE *const Entry; - // Implement isa/cast/dyncast. - static bool classof(const DIEValue *D) { return D->getType() == isString; } +public: + explicit DIEEntry(DIE *E) : DIEValue(isEntry), Entry(E) { + assert(E && "Cannot construct a DIEEntry with a null DIE"); + } - #ifndef NDEBUG - virtual void print(raw_ostream &O) const; - #endif - }; + DIE *getEntry() const { return Entry; } - //===--------------------------------------------------------------------===// - /// DIEEntry - A pointer to another debug information entry. An instance of - /// this class can also be used as a proxy for a debug information entry not - /// yet defined (ie. types.) - class DIEEntry : public DIEValue { - DIE *const Entry; - public: - explicit DIEEntry(DIE *E) : DIEValue(isEntry), Entry(E) { - assert(E && "Cannot construct a DIEEntry with a null DIE"); - } + /// EmitValue - Emit debug information entry offset. + /// + void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override; - DIE *getEntry() const { return Entry; } + /// SizeOf - Determine size of debug information entry in bytes. + /// + unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override { + return Form == dwarf::DW_FORM_ref_addr ? getRefAddrSize(AP) + : sizeof(int32_t); + } - /// EmitValue - Emit debug information entry offset. - /// - virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const; + /// Returns size of a ref_addr entry. + static unsigned getRefAddrSize(AsmPrinter *AP); - /// SizeOf - Determine size of debug information entry in bytes. - /// - virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const { - return Form == dwarf::DW_FORM_ref_addr ? getRefAddrSize(AP) - : sizeof(int32_t); - } + // Implement isa/cast/dyncast. + static bool classof(const DIEValue *E) { return E->getType() == isEntry; } + +#ifndef NDEBUG + void print(raw_ostream &O) const override; +#endif +}; + +//===--------------------------------------------------------------------===// +/// \brief A signature reference to a type unit. +class DIETypeSignature : public DIEValue { + const DwarfTypeUnit &Unit; + +public: + explicit DIETypeSignature(const DwarfTypeUnit &Unit) + : DIEValue(isTypeSignature), Unit(Unit) {} + + /// \brief Emit type unit signature. + void EmitValue(AsmPrinter *Asm, dwarf::Form Form) const override; + + /// Returns size of a ref_sig8 entry. + unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override { + assert(Form == dwarf::DW_FORM_ref_sig8); + return 8; + } + + // \brief Implement isa/cast/dyncast. + static bool classof(const DIEValue *E) { + return E->getType() == isTypeSignature; + } +#ifndef NDEBUG + void print(raw_ostream &O) const override; + void dump() const; +#endif +}; + +//===--------------------------------------------------------------------===// +/// DIELoc - Represents an expression location. +// +class DIELoc : public DIEValue, public DIE { + mutable unsigned Size; // Size in bytes excluding size header. +public: + DIELoc() : DIEValue(isLoc), DIE(0), Size(0) {} - /// Returns size of a ref_addr entry. - static unsigned getRefAddrSize(AsmPrinter *AP); + /// ComputeSize - Calculate the size of the location expression. + /// + unsigned ComputeSize(AsmPrinter *AP) const; - // Implement isa/cast/dyncast. - static bool classof(const DIEValue *E) { return E->getType() == isEntry; } + /// BestForm - Choose the best form for data. + /// + dwarf::Form BestForm(unsigned DwarfVersion) const { + if (DwarfVersion > 3) + return dwarf::DW_FORM_exprloc; + // Pre-DWARF4 location expressions were blocks and not exprloc. + if ((unsigned char)Size == Size) + return dwarf::DW_FORM_block1; + if ((unsigned short)Size == Size) + return dwarf::DW_FORM_block2; + if ((unsigned int)Size == Size) + return dwarf::DW_FORM_block4; + return dwarf::DW_FORM_block; + } + + /// EmitValue - Emit location data. + /// + void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override; + + /// SizeOf - Determine size of location data in bytes. + /// + unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override; + + // Implement isa/cast/dyncast. + static bool classof(const DIEValue *E) { return E->getType() == isLoc; } #ifndef NDEBUG - virtual void print(raw_ostream &O) const; + void print(raw_ostream &O) const override; #endif - }; +}; - //===--------------------------------------------------------------------===// - /// DIEBlock - A block of values. Primarily used for location expressions. - // - class DIEBlock : public DIEValue, public DIE { - unsigned Size; // Size in bytes excluding size header. - public: - DIEBlock() : DIEValue(isBlock), DIE(0), Size(0) {} - - /// ComputeSize - calculate the size of the block. - /// - unsigned ComputeSize(AsmPrinter *AP); - - /// BestForm - Choose the best form for data. - /// - dwarf::Form BestForm() const { - if ((unsigned char)Size == Size) return dwarf::DW_FORM_block1; - if ((unsigned short)Size == Size) return dwarf::DW_FORM_block2; - if ((unsigned int)Size == Size) return dwarf::DW_FORM_block4; - return dwarf::DW_FORM_block; - } +//===--------------------------------------------------------------------===// +/// DIEBlock - Represents a block of values. +// +class DIEBlock : public DIEValue, public DIE { + mutable unsigned Size; // Size in bytes excluding size header. +public: + DIEBlock() : DIEValue(isBlock), DIE(0), Size(0) {} - /// EmitValue - Emit block data. - /// - virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const; + /// ComputeSize - Calculate the size of the location expression. + /// + unsigned ComputeSize(AsmPrinter *AP) const; - /// SizeOf - Determine size of block data in bytes. - /// - virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const; + /// BestForm - Choose the best form for data. + /// + dwarf::Form BestForm() const { + if ((unsigned char)Size == Size) + return dwarf::DW_FORM_block1; + if ((unsigned short)Size == Size) + return dwarf::DW_FORM_block2; + if ((unsigned int)Size == Size) + return dwarf::DW_FORM_block4; + return dwarf::DW_FORM_block; + } + + /// EmitValue - Emit location data. + /// + void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override; + + /// SizeOf - Determine size of location data in bytes. + /// + unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override; - // Implement isa/cast/dyncast. - static bool classof(const DIEValue *E) { return E->getType() == isBlock; } + // Implement isa/cast/dyncast. + static bool classof(const DIEValue *E) { return E->getType() == isBlock; } #ifndef NDEBUG - virtual void print(raw_ostream &O) const; + void print(raw_ostream &O) const override; #endif - }; +}; + +//===--------------------------------------------------------------------===// +/// DIELocList - Represents a pointer to a location list in the debug_loc +/// section. +// +class DIELocList : public DIEValue { + // Index into the .debug_loc vector. + size_t Index; + +public: + DIELocList(size_t I) : DIEValue(isLocList), Index(I) {} + + /// getValue - Grab the current index out. + size_t getValue() const { return Index; } + + /// EmitValue - Emit location data. + /// + void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override; + + /// SizeOf - Determine size of location data in bytes. + /// + unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override; + + // Implement isa/cast/dyncast. + static bool classof(const DIEValue *E) { return E->getType() == isLocList; } + +#ifndef NDEBUG + void print(raw_ostream &O) const override; +#endif +}; } // end llvm namespace diff --git a/lib/CodeGen/AsmPrinter/DIEHash.cpp b/lib/CodeGen/AsmPrinter/DIEHash.cpp index 95eca90..74beec1 100644 --- a/lib/CodeGen/AsmPrinter/DIEHash.cpp +++ b/lib/CodeGen/AsmPrinter/DIEHash.cpp @@ -13,12 +13,13 @@ #define DEBUG_TYPE "dwarfdebug" +#include "ByteStreamer.h" #include "DIEHash.h" - #include "DIE.h" -#include "DwarfCompileUnit.h" +#include "DwarfDebug.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/AsmPrinter.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/Endian.h" @@ -75,7 +76,7 @@ void DIEHash::addSLEB128(int64_t Value) { do { uint8_t Byte = Value & 0x7f; Value >>= 7; - More = !((((Value == 0 ) && ((Byte & 0x40) == 0)) || + More = !((((Value == 0) && ((Byte & 0x40) == 0)) || ((Value == -1) && ((Byte & 0x40) != 0)))); if (More) Byte |= 0x80; // Mark this byte to show that more bytes will follow. @@ -92,10 +93,12 @@ void DIEHash::addParentContext(const DIE &Parent) { // outermost such construct... SmallVector<const DIE *, 1> Parents; const DIE *Cur = &Parent; - while (Cur->getTag() != dwarf::DW_TAG_compile_unit) { + while (Cur->getParent()) { Parents.push_back(Cur); Cur = Cur->getParent(); } + assert(Cur->getTag() == dwarf::DW_TAG_compile_unit || + Cur->getTag() == dwarf::DW_TAG_type_unit); // Reverse iterate over our list to go from the outermost construct to the // innermost. @@ -134,55 +137,55 @@ void DIEHash::collectAttributes(const DIE &Die, DIEAttrs &Attrs) { << dwarf::AttributeString(Abbrevs.getData()[i].getAttribute()) << " added.\n"); switch (Abbrevs.getData()[i].getAttribute()) { - COLLECT_ATTR(DW_AT_name); - COLLECT_ATTR(DW_AT_accessibility); - COLLECT_ATTR(DW_AT_address_class); - COLLECT_ATTR(DW_AT_allocated); - COLLECT_ATTR(DW_AT_artificial); - COLLECT_ATTR(DW_AT_associated); - COLLECT_ATTR(DW_AT_binary_scale); - COLLECT_ATTR(DW_AT_bit_offset); - COLLECT_ATTR(DW_AT_bit_size); - COLLECT_ATTR(DW_AT_bit_stride); - COLLECT_ATTR(DW_AT_byte_size); - COLLECT_ATTR(DW_AT_byte_stride); - COLLECT_ATTR(DW_AT_const_expr); - COLLECT_ATTR(DW_AT_const_value); - COLLECT_ATTR(DW_AT_containing_type); - COLLECT_ATTR(DW_AT_count); - COLLECT_ATTR(DW_AT_data_bit_offset); - COLLECT_ATTR(DW_AT_data_location); - COLLECT_ATTR(DW_AT_data_member_location); - COLLECT_ATTR(DW_AT_decimal_scale); - COLLECT_ATTR(DW_AT_decimal_sign); - COLLECT_ATTR(DW_AT_default_value); - COLLECT_ATTR(DW_AT_digit_count); - COLLECT_ATTR(DW_AT_discr); - COLLECT_ATTR(DW_AT_discr_list); - COLLECT_ATTR(DW_AT_discr_value); - COLLECT_ATTR(DW_AT_encoding); - COLLECT_ATTR(DW_AT_enum_class); - COLLECT_ATTR(DW_AT_endianity); - COLLECT_ATTR(DW_AT_explicit); - COLLECT_ATTR(DW_AT_is_optional); - COLLECT_ATTR(DW_AT_location); - COLLECT_ATTR(DW_AT_lower_bound); - COLLECT_ATTR(DW_AT_mutable); - COLLECT_ATTR(DW_AT_ordering); - COLLECT_ATTR(DW_AT_picture_string); - COLLECT_ATTR(DW_AT_prototyped); - COLLECT_ATTR(DW_AT_small); - COLLECT_ATTR(DW_AT_segment); - COLLECT_ATTR(DW_AT_string_length); - COLLECT_ATTR(DW_AT_threads_scaled); - COLLECT_ATTR(DW_AT_upper_bound); - COLLECT_ATTR(DW_AT_use_location); - COLLECT_ATTR(DW_AT_use_UTF8); - COLLECT_ATTR(DW_AT_variable_parameter); - COLLECT_ATTR(DW_AT_virtuality); - COLLECT_ATTR(DW_AT_visibility); - COLLECT_ATTR(DW_AT_vtable_elem_location); - COLLECT_ATTR(DW_AT_type); + COLLECT_ATTR(DW_AT_name); + COLLECT_ATTR(DW_AT_accessibility); + COLLECT_ATTR(DW_AT_address_class); + COLLECT_ATTR(DW_AT_allocated); + COLLECT_ATTR(DW_AT_artificial); + COLLECT_ATTR(DW_AT_associated); + COLLECT_ATTR(DW_AT_binary_scale); + COLLECT_ATTR(DW_AT_bit_offset); + COLLECT_ATTR(DW_AT_bit_size); + COLLECT_ATTR(DW_AT_bit_stride); + COLLECT_ATTR(DW_AT_byte_size); + COLLECT_ATTR(DW_AT_byte_stride); + COLLECT_ATTR(DW_AT_const_expr); + COLLECT_ATTR(DW_AT_const_value); + COLLECT_ATTR(DW_AT_containing_type); + COLLECT_ATTR(DW_AT_count); + COLLECT_ATTR(DW_AT_data_bit_offset); + COLLECT_ATTR(DW_AT_data_location); + COLLECT_ATTR(DW_AT_data_member_location); + COLLECT_ATTR(DW_AT_decimal_scale); + COLLECT_ATTR(DW_AT_decimal_sign); + COLLECT_ATTR(DW_AT_default_value); + COLLECT_ATTR(DW_AT_digit_count); + COLLECT_ATTR(DW_AT_discr); + COLLECT_ATTR(DW_AT_discr_list); + COLLECT_ATTR(DW_AT_discr_value); + COLLECT_ATTR(DW_AT_encoding); + COLLECT_ATTR(DW_AT_enum_class); + COLLECT_ATTR(DW_AT_endianity); + COLLECT_ATTR(DW_AT_explicit); + COLLECT_ATTR(DW_AT_is_optional); + COLLECT_ATTR(DW_AT_location); + COLLECT_ATTR(DW_AT_lower_bound); + COLLECT_ATTR(DW_AT_mutable); + COLLECT_ATTR(DW_AT_ordering); + COLLECT_ATTR(DW_AT_picture_string); + COLLECT_ATTR(DW_AT_prototyped); + COLLECT_ATTR(DW_AT_small); + COLLECT_ATTR(DW_AT_segment); + COLLECT_ATTR(DW_AT_string_length); + COLLECT_ATTR(DW_AT_threads_scaled); + COLLECT_ATTR(DW_AT_upper_bound); + COLLECT_ATTR(DW_AT_use_location); + COLLECT_ATTR(DW_AT_use_UTF8); + COLLECT_ATTR(DW_AT_variable_parameter); + COLLECT_ATTR(DW_AT_virtuality); + COLLECT_ATTR(DW_AT_visibility); + COLLECT_ATTR(DW_AT_vtable_elem_location); + COLLECT_ATTR(DW_AT_type); default: break; } @@ -269,6 +272,24 @@ void DIEHash::hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag, computeHash(Entry); } +// Hash all of the values in a block like set of values. This assumes that +// all of the data is going to be added as integers. +void DIEHash::hashBlockData(const SmallVectorImpl<DIEValue *> &Values) { + for (SmallVectorImpl<DIEValue *>::const_iterator I = Values.begin(), + E = Values.end(); + I != E; ++I) + Hash.update((uint64_t)cast<DIEInteger>(*I)->getValue()); +} + +// Hash the contents of a loclistptr class. +void DIEHash::hashLocList(const DIELocList &LocList) { + HashingByteStreamer Streamer(*this); + DwarfDebug &DD = *AP->getDwarfDebug(); + for (const auto &Entry : + DD.getDebugLocEntries()[LocList.getValue()].List) + DD.emitDebugLocEntry(Streamer, Entry); +} + // Hash an individual attribute \param Attr based on the type of attribute and // the form. void DIEHash::hashAttribute(AttrEntry Attr, dwarf::Tag Tag) { @@ -276,43 +297,76 @@ void DIEHash::hashAttribute(AttrEntry Attr, dwarf::Tag Tag) { const DIEAbbrevData *Desc = Attr.Desc; dwarf::Attribute Attribute = Desc->getAttribute(); - // 7.27 Step 3 - // ... An attribute that refers to another type entry T is processed as - // follows: - if (const DIEEntry *EntryAttr = dyn_cast<DIEEntry>(Value)) { - hashDIEEntry(Attribute, Tag, *EntryAttr->getEntry()); - return; + // Other attribute values use the letter 'A' as the marker, and the value + // consists of the form code (encoded as an unsigned LEB128 value) followed by + // the encoding of the value according to the form code. To ensure + // reproducibility of the signature, the set of forms used in the signature + // computation is limited to the following: DW_FORM_sdata, DW_FORM_flag, + // DW_FORM_string, and DW_FORM_block. + + switch (Value->getType()) { + // 7.27 Step 3 + // ... An attribute that refers to another type entry T is processed as + // follows: + case DIEValue::isEntry: + hashDIEEntry(Attribute, Tag, *cast<DIEEntry>(Value)->getEntry()); + break; + case DIEValue::isInteger: { + addULEB128('A'); + addULEB128(Attribute); + switch (Desc->getForm()) { + case dwarf::DW_FORM_data1: + case dwarf::DW_FORM_data2: + case dwarf::DW_FORM_data4: + case dwarf::DW_FORM_data8: + case dwarf::DW_FORM_udata: + case dwarf::DW_FORM_sdata: + addULEB128(dwarf::DW_FORM_sdata); + addSLEB128((int64_t)cast<DIEInteger>(Value)->getValue()); + break; + // DW_FORM_flag_present is just flag with a value of one. We still give it a + // value so just use the value. + case dwarf::DW_FORM_flag_present: + case dwarf::DW_FORM_flag: + addULEB128(dwarf::DW_FORM_flag); + addULEB128((int64_t)cast<DIEInteger>(Value)->getValue()); + break; + default: + llvm_unreachable("Unknown integer form!"); + } + break; } - - // Other attribute values use the letter 'A' as the marker, ... - addULEB128('A'); - - addULEB128(Attribute); - - // ... and the value consists of the form code (encoded as an unsigned LEB128 - // value) followed by the encoding of the value according to the form code. To - // ensure reproducibility of the signature, the set of forms used in the - // signature computation is limited to the following: DW_FORM_sdata, - // DW_FORM_flag, DW_FORM_string, and DW_FORM_block. - switch (Desc->getForm()) { - case dwarf::DW_FORM_string: - llvm_unreachable( - "Add support for DW_FORM_string if we ever start emitting them again"); - case dwarf::DW_FORM_GNU_str_index: - case dwarf::DW_FORM_strp: + case DIEValue::isString: + addULEB128('A'); + addULEB128(Attribute); addULEB128(dwarf::DW_FORM_string); addString(cast<DIEString>(Value)->getString()); break; - case dwarf::DW_FORM_data1: - case dwarf::DW_FORM_data2: - case dwarf::DW_FORM_data4: - case dwarf::DW_FORM_data8: - case dwarf::DW_FORM_udata: - addULEB128(dwarf::DW_FORM_sdata); - addSLEB128((int64_t)cast<DIEInteger>(Value)->getValue()); + case DIEValue::isBlock: + case DIEValue::isLoc: + case DIEValue::isLocList: + addULEB128('A'); + addULEB128(Attribute); + addULEB128(dwarf::DW_FORM_block); + if (isa<DIEBlock>(Value)) { + addULEB128(cast<DIEBlock>(Value)->ComputeSize(AP)); + hashBlockData(cast<DIEBlock>(Value)->getValues()); + } else if (isa<DIELoc>(Value)) { + addULEB128(cast<DIELoc>(Value)->ComputeSize(AP)); + hashBlockData(cast<DIELoc>(Value)->getValues()); + } else { + // We could add the block length, but that would take + // a bit of work and not add a lot of uniqueness + // to the hash in some way we could test. + hashLocList(*cast<DIELocList>(Value)); + } break; - default: - llvm_unreachable("Add support for additional forms"); + // FIXME: It's uncertain whether or not we should handle this at the moment. + case DIEValue::isExpr: + case DIEValue::isLabel: + case DIEValue::isDelta: + case DIEValue::isTypeSignature: + llvm_unreachable("Add support for additional value types."); } } diff --git a/lib/CodeGen/AsmPrinter/DIEHash.h b/lib/CodeGen/AsmPrinter/DIEHash.h index f0c4ef9..48f1601 100644 --- a/lib/CodeGen/AsmPrinter/DIEHash.h +++ b/lib/CodeGen/AsmPrinter/DIEHash.h @@ -11,17 +11,22 @@ // //===----------------------------------------------------------------------===// +#ifndef CODEGEN_ASMPRINTER_DIEHASH_H__ +#define CODEGEN_ASMPRINTER_DIEHASH_H__ + #include "DIE.h" #include "llvm/ADT/DenseMap.h" #include "llvm/Support/MD5.h" namespace llvm { +class AsmPrinter; class CompileUnit; /// \brief An object containing the capability of hashing and adding hash /// attributes onto a DIE. class DIEHash { + // The entry for a particular attribute. struct AttrEntry { const DIEValue *Val; @@ -84,6 +89,8 @@ class DIEHash { }; public: + DIEHash(AsmPrinter *A = NULL) : AP(A) {} + /// \brief Computes the ODR signature. uint64_t computeDIEODRSignature(const DIE &Die); @@ -105,13 +112,17 @@ private: void computeHash(const DIE &Die); // Routines that add DIEValues to the hash. -private: +public: + /// \brief Adds \param Value to the hash. + void update(uint8_t Value) { Hash.update(Value); } + /// \brief Encodes and adds \param Value to the hash as a ULEB128. void addULEB128(uint64_t Value); /// \brief Encodes and adds \param Value to the hash as a SLEB128. void addSLEB128(int64_t Value); +private: /// \brief Adds \param Str to the hash and includes a NULL byte. void addString(StringRef Str); @@ -122,6 +133,13 @@ private: /// \brief Hashes the attributes in \param Attrs in order. void hashAttributes(const DIEAttrs &Attrs, dwarf::Tag Tag); + /// \brief Hashes the data in a block like DIEValue, e.g. DW_FORM_block or + /// DW_FORM_exprloc. + void hashBlockData(const SmallVectorImpl<DIEValue *> &Values); + + /// \brief Hashes the contents pointed to in the .debug_loc section. + void hashLocList(const DIELocList &LocList); + /// \brief Hashes an individual attribute. void hashAttribute(AttrEntry Attr, dwarf::Tag Tag); @@ -136,12 +154,16 @@ private: StringRef Name); /// \brief Hashes a reference to a previously referenced type DIE. - void hashRepeatedTypeReference(dwarf::Attribute Attribute, unsigned DieNumber); + void hashRepeatedTypeReference(dwarf::Attribute Attribute, + unsigned DieNumber); void hashNestedType(const DIE &Die, StringRef Name); private: MD5 Hash; + AsmPrinter *AP; DenseMap<const DIE *, unsigned> Numbering; }; } + +#endif diff --git a/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/lib/CodeGen/AsmPrinter/DebugLocEntry.h new file mode 100644 index 0000000..470453f --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DebugLocEntry.h @@ -0,0 +1,123 @@ +//===-- llvm/CodeGen/DebugLocEntry.h - Entry in debug_loc list -*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H__ +#define CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H__ +#include "llvm/IR/Constants.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/MC/MCSymbol.h" + +namespace llvm { +class DwarfCompileUnit; +class MDNode; +/// \brief This struct describes location entries emitted in the .debug_loc +/// section. +class DebugLocEntry { + // Begin and end symbols for the address range that this location is valid. + const MCSymbol *Begin; + const MCSymbol *End; + + // Type of entry that this represents. + enum EntryType { E_Location, E_Integer, E_ConstantFP, E_ConstantInt }; + enum EntryType EntryKind; + + union { + int64_t Int; + const ConstantFP *CFP; + const ConstantInt *CIP; + } Constants; + + // The location in the machine frame. + MachineLocation Loc; + + // The variable to which this location entry corresponds. + const MDNode *Variable; + + // The compile unit to which this location entry is referenced by. + const DwarfCompileUnit *Unit; + + bool hasSameValueOrLocation(const DebugLocEntry &Next) { + if (EntryKind != Next.EntryKind) + return false; + + bool EqualValues; + switch (EntryKind) { + case E_Location: + EqualValues = Loc == Next.Loc; + break; + case E_Integer: + EqualValues = Constants.Int == Next.Constants.Int; + break; + case E_ConstantFP: + EqualValues = Constants.CFP == Next.Constants.CFP; + break; + case E_ConstantInt: + EqualValues = Constants.CIP == Next.Constants.CIP; + break; + } + + return EqualValues; + } + +public: + DebugLocEntry() : Begin(0), End(0), Variable(0), Unit(0) { + Constants.Int = 0; + } + DebugLocEntry(const MCSymbol *B, const MCSymbol *E, MachineLocation &L, + const MDNode *V, const DwarfCompileUnit *U) + : Begin(B), End(E), Loc(L), Variable(V), Unit(U) { + Constants.Int = 0; + EntryKind = E_Location; + } + DebugLocEntry(const MCSymbol *B, const MCSymbol *E, int64_t i, + const DwarfCompileUnit *U) + : Begin(B), End(E), Variable(0), Unit(U) { + Constants.Int = i; + EntryKind = E_Integer; + } + DebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantFP *FPtr, + const DwarfCompileUnit *U) + : Begin(B), End(E), Variable(0), Unit(U) { + Constants.CFP = FPtr; + EntryKind = E_ConstantFP; + } + DebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantInt *IPtr, + const DwarfCompileUnit *U) + : Begin(B), End(E), Variable(0), Unit(U) { + Constants.CIP = IPtr; + EntryKind = E_ConstantInt; + } + + /// \brief Attempt to merge this DebugLocEntry with Next and return + /// true if the merge was successful. Entries can be merged if they + /// share the same Loc/Constant and if Next immediately follows this + /// Entry. + bool Merge(const DebugLocEntry &Next) { + if (End == Next.Begin && hasSameValueOrLocation(Next)) { + End = Next.End; + return true; + } + return false; + } + bool isLocation() const { return EntryKind == E_Location; } + bool isInt() const { return EntryKind == E_Integer; } + bool isConstantFP() const { return EntryKind == E_ConstantFP; } + bool isConstantInt() const { return EntryKind == E_ConstantInt; } + int64_t getInt() const { return Constants.Int; } + const ConstantFP *getConstantFP() const { return Constants.CFP; } + const ConstantInt *getConstantInt() const { return Constants.CIP; } + const MDNode *getVariable() const { return Variable; } + const MCSymbol *getBeginSym() const { return Begin; } + const MCSymbol *getEndSym() const { return End; } + const DwarfCompileUnit *getCU() const { return Unit; } + MachineLocation getLoc() const { return Loc; } +}; + +} +#endif diff --git a/lib/CodeGen/AsmPrinter/DebugLocList.h b/lib/CodeGen/AsmPrinter/DebugLocList.h new file mode 100644 index 0000000..7a51c7b --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DebugLocList.h @@ -0,0 +1,23 @@ +//===--- lib/CodeGen/DebugLocList.h - DWARF debug_loc list ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ASMPRINTER_DEBUGLOCLIST_H__ +#define CODEGEN_ASMPRINTER_DEBUGLOCLIST_H__ + +#include "llvm/MC/MCSymbol.h" +#include "llvm/ADT/SmallVector.h" +#include "DebugLocEntry.h" + +namespace llvm { +struct DebugLocList { + MCSymbol *Label; + SmallVector<DebugLocEntry, 4> List; +}; +} +#endif diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp index 689aeda..bcbb6c8 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp @@ -31,7 +31,7 @@ DwarfAccelTable::DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom> atomList) DwarfAccelTable::~DwarfAccelTable() {} -void DwarfAccelTable::AddName(StringRef Name, DIE *die, char Flags) { +void DwarfAccelTable::AddName(StringRef Name, const DIE *die, char Flags) { assert(Data.empty() && "Already finalized!"); // If the string is in the list already then add this die to the list // otherwise add a new one. @@ -172,7 +172,7 @@ void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) { // Walk through the buckets and emit the full data for each element in // the bucket. For the string case emit the dies and the various offsets. // Terminate each HashData bucket with 0. -void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfUnits *D) { +void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfFile *D) { uint64_t PrevHash = UINT64_MAX; for (size_t i = 0, e = Buckets.size(); i < e; ++i) { for (HashList::const_iterator HI = Buckets[i].begin(), @@ -207,7 +207,7 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfUnits *D) { } // Emit the entire data structure to the output file. -void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, DwarfUnits *D) { +void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, DwarfFile *D) { // Emit the header. EmitHeader(Asm); diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h index 7627313..4a14497 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h @@ -24,7 +24,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormattedStream.h" -#include <map> #include <vector> // The dwarf accelerator tables are an indirect hash table optimized @@ -62,8 +61,7 @@ namespace llvm { class AsmPrinter; -class DIE; -class DwarfUnits; +class DwarfFile; class DwarfAccelTable { @@ -165,10 +163,10 @@ private: // HashData[hash_data_count] public: struct HashDataContents { - DIE *Die; // Offsets + const DIE *Die; // Offsets char Flags; // Specific flags to output - HashDataContents(DIE *D, char Flags) : Die(D), Flags(Flags) {} + HashDataContents(const DIE *D, char Flags) : Die(D), Flags(Flags) {} #ifndef NDEBUG void print(raw_ostream &O) const { O << " Offset: " << Die->getOffset() << "\n"; @@ -216,7 +214,7 @@ private: void EmitBuckets(AsmPrinter *); void EmitHashes(AsmPrinter *); void EmitOffsets(AsmPrinter *, MCSymbol *); - void EmitData(AsmPrinter *, DwarfUnits *D); + void EmitData(AsmPrinter *, DwarfFile *D); // Allocator for HashData and HashDataContents. BumpPtrAllocator Allocator; @@ -241,9 +239,9 @@ private: public: DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom>); ~DwarfAccelTable(); - void AddName(StringRef, DIE *, char = 0); + void AddName(StringRef, const DIE *, char = 0); void FinalizeTable(AsmPrinter *, StringRef); - void Emit(AsmPrinter *, MCSymbol *, DwarfUnits *); + void Emit(AsmPrinter *, MCSymbol *, DwarfFile *); #ifndef NDEBUG void print(raw_ostream &O); void dump() { print(dbgs()); } diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index 8918f3d..30312ac 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -31,7 +32,6 @@ #include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Target/Mangler.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" @@ -46,9 +46,9 @@ DwarfCFIException::DwarfCFIException(AsmPrinter *A) DwarfCFIException::~DwarfCFIException() {} -/// EndModule - Emit all exception information that should come after the +/// endModule - Emit all exception information that should come after the /// content. -void DwarfCFIException::EndModule() { +void DwarfCFIException::endModule() { if (moveTypeModule == AsmPrinter::CFI_M_Debug) Asm->OutStreamer.EmitCFISections(false, true); @@ -82,9 +82,9 @@ void DwarfCFIException::EndModule() { } } -/// BeginFunction - Gather pre-function exception information. Assumes it's +/// beginFunction - Gather pre-function exception information. Assumes it's /// being emitted immediately after the function entry point. -void DwarfCFIException::BeginFunction(const MachineFunction *MF) { +void DwarfCFIException::beginFunction(const MachineFunction *MF) { shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false; // If any landing pads survive, we need an EH table. @@ -113,13 +113,14 @@ void DwarfCFIException::BeginFunction(const MachineFunction *MF) { if (!shouldEmitPersonality && !shouldEmitMoves) return; - Asm->OutStreamer.EmitCFIStartProc(); + Asm->OutStreamer.EmitCFIStartProc(/*IsSimple=*/false); // Indicate personality routine, if any. if (!shouldEmitPersonality) return; - const MCSymbol *Sym = TLOF.getCFIPersonalitySymbol(Per, Asm->Mang, MMI); + const MCSymbol *Sym = + TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI); Asm->OutStreamer.EmitCFIPersonality(Sym, PerEncoding); Asm->OutStreamer.EmitDebugLabel @@ -135,9 +136,9 @@ void DwarfCFIException::BeginFunction(const MachineFunction *MF) { LSDAEncoding); } -/// EndFunction - Gather and emit post-function exception information. +/// endFunction - Gather and emit post-function exception information. /// -void DwarfCFIException::EndFunction() { +void DwarfCFIException::endFunction(const MachineFunction *) { if (!shouldEmitPersonality && !shouldEmitMoves) return; diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 24e2c05..11345eb 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -12,23 +12,25 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "dwarfdebug" +#include "ByteStreamer.h" #include "DwarfDebug.h" #include "DIE.h" #include "DIEHash.h" #include "DwarfAccelTable.h" -#include "DwarfCompileUnit.h" +#include "DwarfUnit.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/DIBuilder.h" -#include "llvm/DebugInfo.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" +#include "llvm/IR/ValueHandle.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" @@ -38,10 +40,10 @@ #include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" +#include "llvm/Support/LEB128.h" #include "llvm/Support/MD5.h" #include "llvm/Support/Path.h" #include "llvm/Support/Timer.h" -#include "llvm/Support/ValueHandle.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" @@ -59,26 +61,17 @@ static cl::opt<bool> UnknownLocations( cl::init(false)); static cl::opt<bool> -GenerateODRHash("generate-odr-hash", cl::Hidden, - cl::desc("Add an ODR hash to external type DIEs."), - cl::init(false)); - -static cl::opt<bool> -GenerateCUHash("generate-cu-hash", cl::Hidden, - cl::desc("Add the CU hash as the dwo_id."), - cl::init(false)); - -static cl::opt<bool> GenerateGnuPubSections("generate-gnu-dwarf-pub-sections", cl::Hidden, cl::desc("Generate GNU-style pubnames and pubtypes"), cl::init(false)); +static cl::opt<bool> GenerateARangeSection("generate-arange-section", + cl::Hidden, + cl::desc("Generate dwarf aranges"), + cl::init(false)); + namespace { -enum DefaultOnOff { - Default, - Enable, - Disable -}; +enum DefaultOnOff { Default, Enable, Disable }; } static cl::opt<DefaultOnOff> @@ -91,7 +84,7 @@ DwarfAccelTables("dwarf-accel-tables", cl::Hidden, static cl::opt<DefaultOnOff> SplitDwarf("split-dwarf", cl::Hidden, - cl::desc("Output prototype dwarf split debug info."), + cl::desc("Output DWARF5 split debug info."), cl::values(clEnumVal(Default, "Default for platform"), clEnumVal(Enable, "Enabled"), clEnumVal(Disable, "Disabled"), clEnumValEnd), @@ -105,29 +98,34 @@ DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden, clEnumVal(Disable, "Disabled"), clEnumValEnd), cl::init(Default)); +static cl::opt<unsigned> +DwarfVersionNumber("dwarf-version", cl::Hidden, + cl::desc("Generate DWARF for dwarf version."), cl::init(0)); + static const char *const DWARFGroupName = "DWARF Emission"; static const char *const DbgTimerName = "DWARF Debug Writer"; //===----------------------------------------------------------------------===// -// Configuration values for initial hash set sizes (log2). -// -static const unsigned InitAbbreviationsSetSize = 9; // log2(512) - namespace llvm { /// resolve - Look in the DwarfDebug map for the MDNode that /// corresponds to the reference. -template <typename T> -T DbgVariable::resolve(DIRef<T> Ref) const { +template <typename T> T DbgVariable::resolve(DIRef<T> Ref) const { return DD->resolve(Ref); } +bool DbgVariable::isBlockByrefVariable() const { + assert(Var.isVariable() && "Invalid complex DbgVariable!"); + return Var.isBlockByrefVariable(DD->getTypeIdentifierMap()); +} + + DIType DbgVariable::getType() const { - DIType Ty = Var.getType(); + DIType Ty = Var.getType().resolve(DD->getTypeIdentifierMap()); // FIXME: isBlockByrefVariable should be reformulated in terms of complex // addresses instead. - if (Var.isBlockByrefVariable()) { + if (Var.isBlockByrefVariable(DD->getTypeIdentifierMap())) { /* Byref variables, in Blocks, are declared by the programmer as "SomeType VarName;", but the compiler creates a __Block_byref_x_VarName struct, and gives the variable VarName @@ -179,22 +177,18 @@ static unsigned getDwarfVersionFromModule(const Module *M) { } DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) - : Asm(A), MMI(Asm->MMI), FirstCU(0), - AbbreviationsSet(InitAbbreviationsSetSize), - SourceIdMap(DIEValueAllocator), - PrevLabel(NULL), GlobalCUIndexCount(0), - InfoHolder(A, &AbbreviationsSet, Abbreviations, "info_string", - DIEValueAllocator), - SkeletonAbbrevSet(InitAbbreviationsSetSize), - SkeletonHolder(A, &SkeletonAbbrevSet, SkeletonAbbrevs, "skel_string", - DIEValueAllocator) { - - DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0; - DwarfStrSectionSym = TextSectionSym = 0; + : Asm(A), MMI(Asm->MMI), FirstCU(0), PrevLabel(NULL), GlobalRangeCount(0), + InfoHolder(A, "info_string", DIEValueAllocator), + UsedNonDefaultText(false), + SkeletonHolder(A, "skel_string", DIEValueAllocator) { + + DwarfInfoSectionSym = DwarfAbbrevSectionSym = DwarfStrSectionSym = 0; DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = DwarfLineSectionSym = 0; DwarfAddrSectionSym = 0; DwarfAbbrevDWOSectionSym = DwarfStrDWOSectionSym = 0; FunctionBeginSym = FunctionEndSym = 0; + CurFn = 0; + CurMI = 0; // Turn on accelerator tables for Darwin by default, pubnames by // default for non-Darwin, and handle split dwarf. @@ -215,7 +209,9 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) else HasDwarfPubSections = DwarfPubSections == Enable; - DwarfVersion = getDwarfVersionFromModule(MMI->getModule()); + DwarfVersion = DwarfVersionNumber + ? DwarfVersionNumber + : getDwarfVersionFromModule(MMI->getModule()); { NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); @@ -228,53 +224,57 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) static MCSymbol *emitSectionSym(AsmPrinter *Asm, const MCSection *Section, const char *SymbolStem = 0) { Asm->OutStreamer.SwitchSection(Section); - if (!SymbolStem) return 0; + if (!SymbolStem) + return 0; MCSymbol *TmpSym = Asm->GetTempSymbol(SymbolStem); Asm->OutStreamer.EmitLabel(TmpSym); return TmpSym; } -MCSymbol *DwarfUnits::getStringPoolSym() { +DwarfFile::~DwarfFile() { + for (DwarfUnit *DU : CUs) + delete DU; +} + +MCSymbol *DwarfFile::getStringPoolSym() { return Asm->GetTempSymbol(StringPref); } -MCSymbol *DwarfUnits::getStringPoolEntry(StringRef Str) { - std::pair<MCSymbol*, unsigned> &Entry = - StringPool.GetOrCreateValue(Str).getValue(); - if (Entry.first) return Entry.first; +MCSymbol *DwarfFile::getStringPoolEntry(StringRef Str) { + std::pair<MCSymbol *, unsigned> &Entry = + StringPool.GetOrCreateValue(Str).getValue(); + if (Entry.first) + return Entry.first; Entry.second = NextStringPoolNumber++; return Entry.first = Asm->GetTempSymbol(StringPref, Entry.second); } -unsigned DwarfUnits::getStringPoolIndex(StringRef Str) { - std::pair<MCSymbol*, unsigned> &Entry = - StringPool.GetOrCreateValue(Str).getValue(); - if (Entry.first) return Entry.second; +unsigned DwarfFile::getStringPoolIndex(StringRef Str) { + std::pair<MCSymbol *, unsigned> &Entry = + StringPool.GetOrCreateValue(Str).getValue(); + if (Entry.first) + return Entry.second; Entry.second = NextStringPoolNumber++; Entry.first = Asm->GetTempSymbol(StringPref, Entry.second); return Entry.second; } -unsigned DwarfUnits::getAddrPoolIndex(const MCSymbol *Sym) { - return getAddrPoolIndex(MCSymbolRefExpr::Create(Sym, Asm->OutContext)); -} - -unsigned DwarfUnits::getAddrPoolIndex(const MCExpr *Sym) { - std::pair<DenseMap<const MCExpr *, unsigned>::iterator, bool> P = - AddressPool.insert(std::make_pair(Sym, NextAddrPoolNumber)); +unsigned DwarfFile::getAddrPoolIndex(const MCSymbol *Sym, bool TLS) { + std::pair<AddrPool::iterator, bool> P = AddressPool.insert( + std::make_pair(Sym, AddressPoolEntry(NextAddrPoolNumber, TLS))); if (P.second) ++NextAddrPoolNumber; - return P.first->second; + return P.first->second.Number; } // Define a unique number for the abbreviation. // -void DwarfUnits::assignAbbrevNumber(DIEAbbrev &Abbrev) { +void DwarfFile::assignAbbrevNumber(DIEAbbrev &Abbrev) { // Check the set for priors. - DIEAbbrev *InSet = AbbreviationsSet->GetOrInsertNode(&Abbrev); + DIEAbbrev *InSet = AbbreviationsSet.GetOrInsertNode(&Abbrev); // If it's newly added. if (InSet == &Abbrev) { @@ -294,7 +294,8 @@ static bool isObjCClass(StringRef Name) { } static bool hasObjCCategory(StringRef Name) { - if (!isObjCClass(Name)) return false; + if (!isObjCClass(Name)) + return false; return Name.find(") ") != StringRef::npos; } @@ -318,35 +319,35 @@ static StringRef getObjCMethodName(StringRef In) { // Helper for sorting sections into a stable output order. static bool SectionSort(const MCSection *A, const MCSection *B) { - std::string LA = (A ? A->getLabelBeginName() : ""); - std::string LB = (B ? B->getLabelBeginName() : ""); - return LA < LB; + std::string LA = (A ? A->getLabelBeginName() : ""); + std::string LB = (B ? B->getLabelBeginName() : ""); + return LA < LB; } // Add the various names to the Dwarf accelerator table names. // TODO: Determine whether or not we should add names for programs // that do not have a DW_AT_name or DW_AT_linkage_name field - this // is only slightly different than the lookup of non-standard ObjC names. -static void addSubprogramNames(CompileUnit *TheCU, DISubprogram SP, - DIE* Die) { - if (!SP.isDefinition()) return; - TheCU->addAccelName(SP.getName(), Die); +static void addSubprogramNames(DwarfUnit *TheU, DISubprogram SP, DIE *Die) { + if (!SP.isDefinition()) + return; + TheU->addAccelName(SP.getName(), Die); // If the linkage name is different than the name, go ahead and output // that as well into the name table. if (SP.getLinkageName() != "" && SP.getName() != SP.getLinkageName()) - TheCU->addAccelName(SP.getLinkageName(), Die); + TheU->addAccelName(SP.getLinkageName(), Die); // If this is an Objective-C selector name add it to the ObjC accelerator // too. if (isObjCClass(SP.getName())) { StringRef Class, Category; getObjCClassCategory(SP.getName(), Class, Category); - TheCU->addAccelObjC(Class, Die); + TheU->addAccelObjC(Class, Die); if (Category != "") - TheCU->addAccelObjC(Category, Die); + TheU->addAccelObjC(Category, Die); // Also add the base method name to the name table. - TheCU->addAccelName(getObjCMethodName(SP.getName()), Die); + TheU->addAccelName(getObjCMethodName(SP.getName()), Die); } } @@ -366,7 +367,8 @@ bool DwarfDebug::isSubprogramContext(const MDNode *Context) { // Find DIE for the given subprogram and attach appropriate DW_AT_low_pc // and DW_AT_high_pc attributes. If there are global variables in this // scope then create and insert DIEs for these variables. -DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, DISubprogram SP) { +DIE *DwarfDebug::updateSubprogramScopeDIE(DwarfCompileUnit *SPCU, + DISubprogram SP) { DIE *SPDie = SPCU->getDIE(SP); assert(SPDie && "Unable to find subprogram DIE!"); @@ -376,7 +378,8 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, DISubprogram SP) { // concrete DIE twice. if (DIE *AbsSPDIE = AbstractSPDies.lookup(SP)) { // Pick up abstract subprogram DIE. - SPDie = SPCU->createAndAddDIE(dwarf::DW_TAG_subprogram, *SPCU->getCUDie()); + SPDie = + SPCU->createAndAddDIE(dwarf::DW_TAG_subprogram, *SPCU->getUnitDie()); SPCU->addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin, AbsSPDIE); } else { DISubprogram SPDecl = SP.getFunctionDeclaration(); @@ -388,8 +391,7 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, DISubprogram SP) { // specification DIE for a function defined inside a function. DIScope SPContext = resolve(SP.getContext()); if (SP.isDefinition() && !SPContext.isCompileUnit() && - !SPContext.isFile() && - !isSubprogramContext(SPContext)) { + !SPContext.isFile() && !isSubprogramContext(SPContext)) { SPCU->addFlag(SPDie, dwarf::DW_AT_declaration); // Add arguments. @@ -397,30 +399,17 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, DISubprogram SP) { DIArray Args = SPTy.getTypeArray(); uint16_t SPTag = SPTy.getTag(); if (SPTag == dwarf::DW_TAG_subroutine_type) - for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { - DIE *Arg = - SPCU->createAndAddDIE(dwarf::DW_TAG_formal_parameter, *SPDie); - DIType ATy(Args.getElement(i)); - SPCU->addType(Arg, ATy); - if (ATy.isArtificial()) - SPCU->addFlag(Arg, dwarf::DW_AT_artificial); - if (ATy.isObjectPointer()) - SPCU->addDIEEntry(SPDie, dwarf::DW_AT_object_pointer, Arg); - } + SPCU->constructSubprogramArguments(*SPDie, Args); DIE *SPDeclDie = SPDie; - SPDie = - SPCU->createAndAddDIE(dwarf::DW_TAG_subprogram, *SPCU->getCUDie()); + SPDie = SPCU->createAndAddDIE(dwarf::DW_TAG_subprogram, + *SPCU->getUnitDie()); SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, SPDeclDie); } } } - SPCU->addLabelAddress(SPDie, dwarf::DW_AT_low_pc, - Asm->GetTempSymbol("func_begin", - Asm->getFunctionNumber())); - SPCU->addLabelAddress(SPDie, dwarf::DW_AT_high_pc, - Asm->GetTempSymbol("func_end", - Asm->getFunctionNumber())); + attachLowHighPC(SPCU, SPDie, FunctionBeginSym, FunctionEndSym); + const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); MachineLocation Location(RI->getFrameRegister(*Asm->MF)); SPCU->addAddress(SPDie, dwarf::DW_AT_frame_base, Location); @@ -453,9 +442,43 @@ bool DwarfDebug::isLexicalScopeDIENull(LexicalScope *Scope) { return !End; } +static void addSectionLabel(AsmPrinter *Asm, DwarfUnit *U, DIE *D, + dwarf::Attribute A, const MCSymbol *L, + const MCSymbol *Sec) { + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + U->addSectionLabel(D, A, L); + else + U->addSectionDelta(D, A, L, Sec); +} + +void DwarfDebug::addScopeRangeList(DwarfCompileUnit *TheCU, DIE *ScopeDIE, + const SmallVectorImpl<InsnRange> &Range) { + // Emit offset in .debug_range as a relocatable label. emitDIE will handle + // emitting it appropriately. + MCSymbol *RangeSym = Asm->GetTempSymbol("debug_ranges", GlobalRangeCount++); + + // Under fission, ranges are specified by constant offsets relative to the + // CU's DW_AT_GNU_ranges_base. + if (useSplitDwarf()) + TheCU->addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, RangeSym, + DwarfDebugRangeSectionSym); + else + addSectionLabel(Asm, TheCU, ScopeDIE, dwarf::DW_AT_ranges, RangeSym, + DwarfDebugRangeSectionSym); + + RangeSpanList List(RangeSym); + for (const InsnRange &R : Range) { + RangeSpan Span(getLabelBeforeInsn(R.first), getLabelAfterInsn(R.second)); + List.addRange(std::move(Span)); + } + + // Add the range list to the set of ranges to be emitted. + TheCU->addRangeList(std::move(List)); +} + // Construct new DW_TAG_lexical_block for this scope and attach // DW_AT_low_pc/DW_AT_high_pc labels. -DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU, +DIE *DwarfDebug::constructLexicalScopeDIE(DwarfCompileUnit *TheCU, LexicalScope *Scope) { if (isLexicalScopeDIENull(Scope)) return 0; @@ -464,29 +487,16 @@ DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU, if (Scope->isAbstractScope()) return ScopeDIE; - const SmallVectorImpl<InsnRange> &Ranges = Scope->getRanges(); - // If we have multiple ranges, emit them into the range section. - if (Ranges.size() > 1) { - // .debug_range section has not been laid out yet. Emit offset in - // .debug_range as a uint, size 4, for now. emitDIE will handle - // DW_AT_ranges appropriately. - TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4, - DebugRangeSymbols.size() - * Asm->getDataLayout().getPointerSize()); - for (SmallVectorImpl<InsnRange>::const_iterator RI = Ranges.begin(), - RE = Ranges.end(); RI != RE; ++RI) { - DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first)); - DebugRangeSymbols.push_back(getLabelAfterInsn(RI->second)); - } + const SmallVectorImpl<InsnRange> &ScopeRanges = Scope->getRanges(); - // Terminate the range list. - DebugRangeSymbols.push_back(NULL); - DebugRangeSymbols.push_back(NULL); + // If we have multiple ranges, emit them into the range section. + if (ScopeRanges.size() > 1) { + addScopeRangeList(TheCU, ScopeDIE, ScopeRanges); return ScopeDIE; } // Construct the address range for this DIE. - SmallVectorImpl<InsnRange>::const_iterator RI = Ranges.begin(); + SmallVectorImpl<InsnRange>::const_iterator RI = ScopeRanges.begin(); MCSymbol *Start = getLabelBeforeInsn(RI->first); MCSymbol *End = getLabelAfterInsn(RI->second); assert(End && "End label should not be null!"); @@ -494,18 +504,17 @@ DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU, assert(Start->isDefined() && "Invalid starting label for an inlined scope!"); assert(End->isDefined() && "Invalid end label for an inlined scope!"); - TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_low_pc, Start); - TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_high_pc, End); + attachLowHighPC(TheCU, ScopeDIE, Start, End); return ScopeDIE; } // This scope represents inlined body of a function. Construct DIE to // represent this concrete inlined copy of the function. -DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, +DIE *DwarfDebug::constructInlinedScopeDIE(DwarfCompileUnit *TheCU, LexicalScope *Scope) { - const SmallVectorImpl<InsnRange> &Ranges = Scope->getRanges(); - assert(Ranges.empty() == false && + const SmallVectorImpl<InsnRange> &ScopeRanges = Scope->getRanges(); + assert(!ScopeRanges.empty() && "LexicalScope does not have instruction markers!"); if (!Scope->getScopeNode()) @@ -521,22 +530,11 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, DIE *ScopeDIE = new DIE(dwarf::DW_TAG_inlined_subroutine); TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin, OriginDIE); - if (Ranges.size() > 1) { - // .debug_range section has not been laid out yet. Emit offset in - // .debug_range as a uint, size 4, for now. emitDIE will handle - // DW_AT_ranges appropriately. - TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4, - DebugRangeSymbols.size() - * Asm->getDataLayout().getPointerSize()); - for (SmallVectorImpl<InsnRange>::const_iterator RI = Ranges.begin(), - RE = Ranges.end(); RI != RE; ++RI) { - DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first)); - DebugRangeSymbols.push_back(getLabelAfterInsn(RI->second)); - } - DebugRangeSymbols.push_back(NULL); - DebugRangeSymbols.push_back(NULL); - } else { - SmallVectorImpl<InsnRange>::const_iterator RI = Ranges.begin(); + // If we have multiple ranges, emit them into the range section. + if (ScopeRanges.size() > 1) + addScopeRangeList(TheCU, ScopeDIE, ScopeRanges); + else { + SmallVectorImpl<InsnRange>::const_iterator RI = ScopeRanges.begin(); MCSymbol *StartLabel = getLabelBeforeInsn(RI->first); MCSymbol *EndLabel = getLabelAfterInsn(RI->second); @@ -547,17 +545,16 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, "Invalid starting label for an inlined scope!"); assert(EndLabel->isDefined() && "Invalid end label for an inlined scope!"); - TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_low_pc, StartLabel); - TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_high_pc, EndLabel); + attachLowHighPC(TheCU, ScopeDIE, StartLabel, EndLabel); } InlinedSubprogramDIEs.insert(OriginDIE); // Add the call site information to the DIE. DILocation DL(Scope->getInlinedAt()); - TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, None, - getOrCreateSourceID(DL.getFilename(), DL.getDirectory(), - TheCU->getUniqueID())); + TheCU->addUInt( + ScopeDIE, dwarf::DW_AT_call_file, None, + TheCU->getOrCreateSourceID(DL.getFilename(), DL.getDirectory())); TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, None, DL.getLineNumber()); // Add name to the name table, we do this here because we're guaranteed @@ -567,37 +564,49 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, return ScopeDIE; } -DIE *DwarfDebug::createScopeChildrenDIE(CompileUnit *TheCU, LexicalScope *Scope, - SmallVectorImpl<DIE*> &Children) { - DIE *ObjectPointer = NULL; +DIE *DwarfDebug::createScopeChildrenDIE(DwarfCompileUnit *TheCU, + LexicalScope *Scope, + SmallVectorImpl<DIE *> &Children) { + DIE *ObjectPointer = NULL; // Collect arguments for current function. - if (LScopes.isCurrentFunctionScope(Scope)) - for (unsigned i = 0, N = CurrentFnArguments.size(); i < N; ++i) - if (DbgVariable *ArgDV = CurrentFnArguments[i]) + if (LScopes.isCurrentFunctionScope(Scope)) { + for (DbgVariable *ArgDV : CurrentFnArguments) + if (ArgDV) if (DIE *Arg = - TheCU->constructVariableDIE(*ArgDV, Scope->isAbstractScope())) { + TheCU->constructVariableDIE(*ArgDV, Scope->isAbstractScope())) { Children.push_back(Arg); - if (ArgDV->isObjectPointer()) ObjectPointer = Arg; + if (ArgDV->isObjectPointer()) + ObjectPointer = Arg; } + // If this is a variadic function, add an unspecified parameter. + DISubprogram SP(Scope->getScopeNode()); + DIArray FnArgs = SP.getType().getTypeArray(); + if (FnArgs.getElement(FnArgs.getNumElements() - 1) + .isUnspecifiedParameter()) { + DIE *Ellipsis = new DIE(dwarf::DW_TAG_unspecified_parameters); + Children.push_back(Ellipsis); + } + } + // Collect lexical scope children first. - const SmallVectorImpl<DbgVariable *> &Variables =ScopeVariables.lookup(Scope); - for (unsigned i = 0, N = Variables.size(); i < N; ++i) - if (DIE *Variable = - TheCU->constructVariableDIE(*Variables[i], Scope->isAbstractScope())) { + for (DbgVariable *DV : ScopeVariables.lookup(Scope)) + if (DIE *Variable = TheCU->constructVariableDIE(*DV, + Scope->isAbstractScope())) { Children.push_back(Variable); - if (Variables[i]->isObjectPointer()) ObjectPointer = Variable; + if (DV->isObjectPointer()) + ObjectPointer = Variable; } - const SmallVectorImpl<LexicalScope *> &Scopes = Scope->getChildren(); - for (unsigned j = 0, M = Scopes.size(); j < M; ++j) - if (DIE *Nested = constructScopeDIE(TheCU, Scopes[j])) + for (LexicalScope *LS : Scope->getChildren()) + if (DIE *Nested = constructScopeDIE(TheCU, LS)) Children.push_back(Nested); return ObjectPointer; } // Construct a DIE for this scope. -DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { +DIE *DwarfDebug::constructScopeDIE(DwarfCompileUnit *TheCU, + LexicalScope *Scope) { if (!Scope || !Scope->getScopeNode()) return NULL; @@ -634,10 +643,12 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { // There is no need to emit empty lexical block DIE. std::pair<ImportedEntityMap::const_iterator, - ImportedEntityMap::const_iterator> Range = std::equal_range( - ScopesWithImportedEntities.begin(), ScopesWithImportedEntities.end(), - std::pair<const MDNode *, const MDNode *>(DS, (const MDNode*)0), - less_first()); + ImportedEntityMap::const_iterator> Range = + std::equal_range( + ScopesWithImportedEntities.begin(), + ScopesWithImportedEntities.end(), + std::pair<const MDNode *, const MDNode *>(DS, (const MDNode *)0), + less_first()); if (Children.empty() && Range.first == Range.second) return NULL; ScopeDIE = constructLexicalScopeDIE(TheCU, Scope); @@ -657,146 +668,55 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { ObjectPointer = createScopeChildrenDIE(TheCU, Scope, Children); // Add children - for (SmallVectorImpl<DIE *>::iterator I = Children.begin(), - E = Children.end(); I != E; ++I) - ScopeDIE->addChild(*I); + for (DIE *I : Children) + ScopeDIE->addChild(I); if (DS.isSubprogram() && ObjectPointer != NULL) TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, ObjectPointer); - if (DS.isSubprogram()) - TheCU->addPubTypes(DISubprogram(DS)); - return ScopeDIE; } -// Look up the source id with the given directory and source file names. -// If none currently exists, create a new id and insert it in the -// SourceIds map. This can update DirectoryNames and SourceFileNames maps -// as well. -unsigned DwarfDebug::getOrCreateSourceID(StringRef FileName, - StringRef DirName, unsigned CUID) { - // If we use .loc in assembly, we can't separate .file entries according to - // compile units. Thus all files will belong to the default compile unit. - - // FIXME: add a better feature test than hasRawTextSupport. Even better, - // extend .file to support this. - if (Asm->TM.hasMCUseLoc() && Asm->OutStreamer.hasRawTextSupport()) - CUID = 0; - - // If FE did not provide a file name, then assume stdin. - if (FileName.empty()) - return getOrCreateSourceID("<stdin>", StringRef(), CUID); - - // TODO: this might not belong here. See if we can factor this better. - if (DirName == CompilationDir) - DirName = ""; - - // FileIDCUMap stores the current ID for the given compile unit. - unsigned SrcId = FileIDCUMap[CUID] + 1; - - // We look up the CUID/file/dir by concatenating them with a zero byte. - SmallString<128> NamePair; - NamePair += utostr(CUID); - NamePair += '\0'; - NamePair += DirName; - NamePair += '\0'; // Zero bytes are not allowed in paths. - NamePair += FileName; - - StringMapEntry<unsigned> &Ent = SourceIdMap.GetOrCreateValue(NamePair, SrcId); - if (Ent.getValue() != SrcId) - return Ent.getValue(); - - FileIDCUMap[CUID] = SrcId; - // Print out a .file directive to specify files for .loc directives. - Asm->OutStreamer.EmitDwarfFileDirective(SrcId, DirName, FileName, CUID); +void DwarfDebug::addGnuPubAttributes(DwarfUnit *U, DIE *D) const { + if (!GenerateGnuPubSections) + return; - return SrcId; + U->addFlag(D, dwarf::DW_AT_GNU_pubnames); } -// Create new CompileUnit for the given metadata node with tag +// Create new DwarfCompileUnit for the given metadata node with tag // DW_TAG_compile_unit. -CompileUnit *DwarfDebug::constructCompileUnit(DICompileUnit DIUnit) { +DwarfCompileUnit *DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) { StringRef FN = DIUnit.getFilename(); CompilationDir = DIUnit.getDirectory(); DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); - CompileUnit *NewCU = new CompileUnit(GlobalCUIndexCount++, Die, DIUnit, Asm, - this, &InfoHolder); + DwarfCompileUnit *NewCU = new DwarfCompileUnit( + InfoHolder.getUnits().size(), Die, DIUnit, Asm, this, &InfoHolder); + InfoHolder.addUnit(NewCU); - FileIDCUMap[NewCU->getUniqueID()] = 0; - // Call this to emit a .file directive if it wasn't emitted for the source - // file this CU comes from yet. - getOrCreateSourceID(FN, CompilationDir, NewCU->getUniqueID()); + // LTO with assembly output shares a single line table amongst multiple CUs. + // To avoid the compilation directory being ambiguous, let the line table + // explicitly describe the directory of all files, never relying on the + // compilation directory. + if (!Asm->OutStreamer.hasRawTextSupport() || SingleCU) + Asm->OutStreamer.getContext().setMCLineTableCompilationDir( + NewCU->getUniqueID(), CompilationDir); NewCU->addString(Die, dwarf::DW_AT_producer, DIUnit.getProducer()); NewCU->addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2, DIUnit.getLanguage()); NewCU->addString(Die, dwarf::DW_AT_name, FN); - // 2.17.1 requires that we use DW_AT_low_pc for a single entry point - // into an entity. We're using 0 (or a NULL label) for this. For - // split dwarf it's in the skeleton CU so omit it here. - if (!useSplitDwarf()) - NewCU->addLabelAddress(Die, dwarf::DW_AT_low_pc, NULL); - - // Define start line table label for each Compile Unit. - MCSymbol *LineTableStartSym = Asm->GetTempSymbol("line_table_start", - NewCU->getUniqueID()); - Asm->OutStreamer.getContext().setMCLineTableSymbol(LineTableStartSym, - NewCU->getUniqueID()); - - // Use a single line table if we are using .loc and generating assembly. - bool UseTheFirstCU = - (Asm->TM.hasMCUseLoc() && Asm->OutStreamer.hasRawTextSupport()) || - (NewCU->getUniqueID() == 0); - if (!useSplitDwarf()) { - // DW_AT_stmt_list is a offset of line number information for this - // compile unit in debug_line section. For split dwarf this is - // left in the skeleton CU and so not included. - // The line table entries are not always emitted in assembly, so it - // is not okay to use line_table_start here. - if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_sec_offset, - UseTheFirstCU ? Asm->GetTempSymbol("section_line") - : LineTableStartSym); - else if (UseTheFirstCU) - NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0); - else - NewCU->addDelta(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, - LineTableStartSym, DwarfLineSectionSym); + NewCU->initStmtList(DwarfLineSectionSym); // If we're using split dwarf the compilation dir is going to be in the // skeleton CU and so we don't need to duplicate it here. if (!CompilationDir.empty()) NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir); - // Flags to let the linker know we have emitted new style pubnames. Only - // emit it here if we don't have a skeleton CU for split dwarf. - if (GenerateGnuPubSections) { - if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - NewCU->addLabel(Die, dwarf::DW_AT_GNU_pubnames, - dwarf::DW_FORM_sec_offset, - Asm->GetTempSymbol("gnu_pubnames", - NewCU->getUniqueID())); - else - NewCU->addDelta(Die, dwarf::DW_AT_GNU_pubnames, dwarf::DW_FORM_data4, - Asm->GetTempSymbol("gnu_pubnames", - NewCU->getUniqueID()), - DwarfGnuPubNamesSectionSym); - - if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - NewCU->addLabel(Die, dwarf::DW_AT_GNU_pubtypes, - dwarf::DW_FORM_sec_offset, - Asm->GetTempSymbol("gnu_pubtypes", - NewCU->getUniqueID())); - else - NewCU->addDelta(Die, dwarf::DW_AT_GNU_pubtypes, dwarf::DW_FORM_data4, - Asm->GetTempSymbol("gnu_pubtypes", - NewCU->getUniqueID()), - DwarfGnuPubTypesSectionSym); - } + addGnuPubAttributes(NewCU, Die); } if (DIUnit.isOptimized()) @@ -808,12 +728,18 @@ CompileUnit *DwarfDebug::constructCompileUnit(DICompileUnit DIUnit) { if (unsigned RVer = DIUnit.getRunTimeVersion()) NewCU->addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers, - dwarf::DW_FORM_data1, RVer); + dwarf::DW_FORM_data1, RVer); if (!FirstCU) FirstCU = NewCU; - InfoHolder.addUnit(NewCU); + if (useSplitDwarf()) { + NewCU->initSection(Asm->getObjFileLowering().getDwarfInfoDWOSection(), + DwarfInfoDWOSectionSym); + NewCU->setSkeleton(constructSkeletonCU(NewCU)); + } else + NewCU->initSection(Asm->getObjFileLowering().getDwarfInfoSection(), + DwarfInfoSectionSym); CUMap.insert(std::make_pair(DIUnit, NewCU)); CUDieMap.insert(std::make_pair(Die, NewCU)); @@ -821,12 +747,13 @@ CompileUnit *DwarfDebug::constructCompileUnit(DICompileUnit DIUnit) { } // Construct subprogram DIE. -void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, const MDNode *N) { +void DwarfDebug::constructSubprogramDIE(DwarfCompileUnit *TheCU, + const MDNode *N) { // FIXME: We should only call this routine once, however, during LTO if a // program is defined in multiple CUs we could end up calling it out of // beginModule as we walk the CUs. - CompileUnit *&CURef = SPMap[N]; + DwarfCompileUnit *&CURef = SPMap[N]; if (CURef) return; CURef = TheCU; @@ -843,24 +770,22 @@ void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, const MDNode *N) { TheCU->addGlobalName(SP.getName(), SubprogramDie, resolve(SP.getContext())); } -void DwarfDebug::constructImportedEntityDIE(CompileUnit *TheCU, +void DwarfDebug::constructImportedEntityDIE(DwarfCompileUnit *TheCU, const MDNode *N) { DIImportedEntity Module(N); - if (!Module.Verify()) - return; + assert(Module.Verify()); if (DIE *D = TheCU->getOrCreateContextDIE(Module.getContext())) constructImportedEntityDIE(TheCU, Module, D); } -void DwarfDebug::constructImportedEntityDIE(CompileUnit *TheCU, const MDNode *N, - DIE *Context) { +void DwarfDebug::constructImportedEntityDIE(DwarfCompileUnit *TheCU, + const MDNode *N, DIE *Context) { DIImportedEntity Module(N); - if (!Module.Verify()) - return; + assert(Module.Verify()); return constructImportedEntityDIE(TheCU, Module, Context); } -void DwarfDebug::constructImportedEntityDIE(CompileUnit *TheCU, +void DwarfDebug::constructImportedEntityDIE(DwarfCompileUnit *TheCU, const DIImportedEntity &Module, DIE *Context) { assert(Module.Verify() && @@ -869,7 +794,7 @@ void DwarfDebug::constructImportedEntityDIE(CompileUnit *TheCU, DIE *IMDie = new DIE(Module.getTag()); TheCU->insertDIE(Module, IMDie); DIE *EntityDie; - DIDescriptor Entity = Module.getEntity(); + DIDescriptor Entity = resolve(Module.getEntity()); if (Entity.isNameSpace()) EntityDie = TheCU->getOrCreateNameSpace(DINameSpace(Entity)); else if (Entity.isSubprogram()) @@ -878,11 +803,9 @@ void DwarfDebug::constructImportedEntityDIE(CompileUnit *TheCU, EntityDie = TheCU->getOrCreateTypeDIE(DIType(Entity)); else EntityDie = TheCU->getDIE(Entity); - unsigned FileID = getOrCreateSourceID(Module.getContext().getFilename(), - Module.getContext().getDirectory(), - TheCU->getUniqueID()); - TheCU->addUInt(IMDie, dwarf::DW_AT_decl_file, None, FileID); - TheCU->addUInt(IMDie, dwarf::DW_AT_decl_line, None, Module.getLineNumber()); + TheCU->addSourceLine(IMDie, Module.getLineNumber(), + Module.getContext().getFilename(), + Module.getContext().getDirectory()); TheCU->addDIEEntry(IMDie, dwarf::DW_AT_import, EntityDie); StringRef Name = Module.getName(); if (!Name.empty()) @@ -909,9 +832,11 @@ void DwarfDebug::beginModule() { // Emit initial sections so we can reference labels later. emitSectionLabels(); - for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { - DICompileUnit CUNode(CU_Nodes->getOperand(i)); - CompileUnit *CU = constructCompileUnit(CUNode); + SingleCU = CU_Nodes->getNumOperands() == 1; + + for (MDNode *N : CU_Nodes->operands()) { + DICompileUnit CUNode(N); + DwarfCompileUnit *CU = constructDwarfCompileUnit(CUNode); DIArray ImportedEntities = CUNode.getImportedEntities(); for (unsigned i = 0, e = ImportedEntities.getNumElements(); i != e; ++i) ScopesWithImportedEntities.push_back(std::make_pair( @@ -929,8 +854,13 @@ void DwarfDebug::beginModule() { for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i) CU->getOrCreateTypeDIE(EnumTypes.getElement(i)); DIArray RetainedTypes = CUNode.getRetainedTypes(); - for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i) - CU->getOrCreateTypeDIE(RetainedTypes.getElement(i)); + for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i) { + DIType Ty(RetainedTypes.getElement(i)); + // The retained types array by design contains pointers to + // MDNodes rather than DIRefs. Unique them here. + DIType UniqueTy(resolve(Ty.getRef())); + CU->getOrCreateTypeDIE(UniqueTy); + } // Emit imported_modules last so that the relevant context is already // available. for (unsigned i = 0, e = ImportedEntities.getNumElements(); i != e; ++i) @@ -947,14 +877,11 @@ void DwarfDebug::beginModule() { // Attach DW_AT_inline attribute with inlined subprogram DIEs. void DwarfDebug::computeInlinedDIEs() { // Attach DW_AT_inline attribute with inlined subprogram DIEs. - for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(), - AE = InlinedSubprogramDIEs.end(); AI != AE; ++AI) { - DIE *ISP = *AI; + for (DIE *ISP : InlinedSubprogramDIEs) FirstCU->addUInt(ISP, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined); - } - for (DenseMap<const MDNode *, DIE *>::iterator AI = AbstractSPDies.begin(), - AE = AbstractSPDies.end(); AI != AE; ++AI) { - DIE *ISP = AI->second; + + for (const auto &AI : AbstractSPDies) { + DIE *ISP = AI.second; if (InlinedSubprogramDIEs.count(ISP)) continue; FirstCU->addUInt(ISP, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined); @@ -966,8 +893,8 @@ void DwarfDebug::collectDeadVariables() { const Module *M = MMI->getModule(); if (NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu")) { - for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { - DICompileUnit TheCU(CU_Nodes->getOperand(i)); + for (MDNode *N : CU_Nodes->operands()) { + DICompileUnit TheCU(N); DIArray Subprograms = TheCU.getSubprograms(); for (unsigned i = 0, e = Subprograms.getNumElements(); i != e; ++i) { DISubprogram SP(Subprograms.getElement(i)); @@ -982,7 +909,8 @@ void DwarfDebug::collectDeadVariables() { continue; // Construct subprogram DIE and add variables DIEs. - CompileUnit *SPCU = CUMap.lookup(TheCU); + DwarfCompileUnit *SPCU = + static_cast<DwarfCompileUnit *>(CUMap.lookup(TheCU)); assert(SPCU && "Unable to find Compile Unit!"); // FIXME: See the comment in constructSubprogramDIE about duplicate // subprogram DIEs. @@ -993,8 +921,7 @@ void DwarfDebug::collectDeadVariables() { if (!DV.isVariable()) continue; DbgVariable NewVar(DV, NULL, this); - if (DIE *VariableDIE = - SPCU->constructVariableDIE(NewVar, false)) + if (DIE *VariableDIE = SPCU->constructVariableDIE(NewVar, false)) SPDIE->addChild(VariableDIE); } } @@ -1002,41 +929,6 @@ void DwarfDebug::collectDeadVariables() { } } -// Type Signature [7.27] and ODR Hash code. - -/// \brief Grabs the string in whichever attribute is passed in and returns -/// a reference to it. Returns "" if the attribute doesn't exist. -static StringRef getDIEStringAttr(DIE *Die, unsigned Attr) { - DIEValue *V = Die->findAttribute(Attr); - - if (DIEString *S = dyn_cast_or_null<DIEString>(V)) - return S->getString(); - - return StringRef(""); -} - -/// Return true if the current DIE is contained within an anonymous namespace. -static bool isContainedInAnonNamespace(DIE *Die) { - DIE *Parent = Die->getParent(); - - while (Parent) { - if (Parent->getTag() == dwarf::DW_TAG_namespace && - getDIEStringAttr(Parent, dwarf::DW_AT_name) == "") - return true; - Parent = Parent->getParent(); - } - - return false; -} - -/// Test if the current CU language is C++ and that we have -/// a named type that is not contained in an anonymous namespace. -static bool shouldAddODRHash(CompileUnit *CU, DIE *Die) { - return CU->getLanguage() == dwarf::DW_LANG_C_plus_plus && - getDIEStringAttr(Die, dwarf::DW_AT_name) != "" && - !isContainedInAnonNamespace(Die); -} - void DwarfDebug::finalizeModuleInfo() { // Collect info for variables that were optimized out. collectDeadVariables(); @@ -1044,47 +936,67 @@ void DwarfDebug::finalizeModuleInfo() { // Attach DW_AT_inline attribute with inlined subprogram DIEs. computeInlinedDIEs(); - // Split out type units and conditionally add an ODR tag to the split - // out type. - // FIXME: Do type splitting. - for (unsigned i = 0, e = TypeUnits.size(); i != e; ++i) { - DIE *Die = TypeUnits[i]; - DIEHash Hash; - // If we've requested ODR hashes and it's applicable for an ODR hash then - // add the ODR signature now. - // FIXME: This should be added onto the type unit, not the type, but this - // works as an intermediate stage. - if (GenerateODRHash && shouldAddODRHash(CUMap.begin()->second, Die)) - CUMap.begin()->second->addUInt(Die, dwarf::DW_AT_GNU_odr_signature, - dwarf::DW_FORM_data8, - Hash.computeDIEODRSignature(*Die)); - } - - // Handle anything that needs to be done on a per-cu basis. - for (DenseMap<const MDNode *, CompileUnit *>::iterator CUI = CUMap.begin(), - CUE = CUMap.end(); - CUI != CUE; ++CUI) { - CompileUnit *TheCU = CUI->second; + // Handle anything that needs to be done on a per-unit basis after + // all other generation. + for (DwarfUnit *TheU : getUnits()) { // Emit DW_AT_containing_type attribute to connect types with their // vtable holding type. - TheCU->constructContainingTypeDIEs(); - - // If we're splitting the dwarf out now that we've got the entire - // CU then construct a skeleton CU based upon it. - if (useSplitDwarf()) { - uint64_t ID = 0; - if (GenerateCUHash) { - DIEHash CUHash; - ID = CUHash.computeCUSignature(*TheCU->getCUDie()); + TheU->constructContainingTypeDIEs(); + + // Add CU specific attributes if we need to add any. + if (TheU->getUnitDie()->getTag() == dwarf::DW_TAG_compile_unit) { + // If we're splitting the dwarf out now that we've got the entire + // CU then add the dwo id to it. + DwarfCompileUnit *SkCU = + static_cast<DwarfCompileUnit *>(TheU->getSkeleton()); + if (useSplitDwarf()) { + // Emit a unique identifier for this CU. + uint64_t ID = DIEHash(Asm).computeCUSignature(*TheU->getUnitDie()); + TheU->addUInt(TheU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id, + dwarf::DW_FORM_data8, ID); + SkCU->addUInt(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id, + dwarf::DW_FORM_data8, ID); + + // We don't keep track of which addresses are used in which CU so this + // is a bit pessimistic under LTO. + if (!InfoHolder.getAddrPool()->empty()) + addSectionLabel(Asm, SkCU, SkCU->getUnitDie(), + dwarf::DW_AT_GNU_addr_base, DwarfAddrSectionSym, + DwarfAddrSectionSym); + if (!TheU->getRangeLists().empty()) + addSectionLabel(Asm, SkCU, SkCU->getUnitDie(), + dwarf::DW_AT_GNU_ranges_base, + DwarfDebugRangeSectionSym, DwarfDebugRangeSectionSym); + } + + // If we have code split among multiple sections or non-contiguous + // ranges of code then emit a DW_AT_ranges attribute on the unit that will + // remain in the .o file, otherwise add a DW_AT_low_pc. + // FIXME: We should use ranges allow reordering of code ala + // .subsections_via_symbols in mach-o. This would mean turning on + // ranges for all subprogram DIEs for mach-o. + DwarfCompileUnit *U = SkCU ? SkCU : static_cast<DwarfCompileUnit *>(TheU); + unsigned NumRanges = TheU->getRanges().size(); + if (NumRanges) { + if (NumRanges > 1) { + addSectionLabel(Asm, U, U->getUnitDie(), dwarf::DW_AT_ranges, + Asm->GetTempSymbol("cu_ranges", U->getUniqueID()), + DwarfDebugRangeSectionSym); + + // A DW_AT_low_pc attribute may also be specified in combination with + // DW_AT_ranges to specify the default base address for use in + // location lists (see Section 2.6.2) and range lists (see Section + // 2.17.3). + U->addUInt(U->getUnitDie(), dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, + 0); + } else { + RangeSpan &Range = TheU->getRanges().back(); + U->addLocalLabelAddress(U->getUnitDie(), dwarf::DW_AT_low_pc, + Range.getStart()); + U->addLabelDelta(U->getUnitDie(), dwarf::DW_AT_high_pc, + Range.getEnd(), Range.getStart()); + } } - // This should be a unique identifier when we want to build .dwp files. - TheCU->addUInt(TheCU->getCUDie(), dwarf::DW_AT_GNU_dwo_id, - dwarf::DW_FORM_data8, ID); - // Now construct the skeleton CU associated. - CompileUnit *SkCU = constructSkeletonCU(TheCU); - // This should be a unique identifier when we want to build .dwp files. - SkCU->addUInt(SkCU->getCUDie(), dwarf::DW_AT_GNU_dwo_id, - dwarf::DW_FORM_data8, ID); } } @@ -1095,9 +1007,8 @@ void DwarfDebug::finalizeModuleInfo() { } void DwarfDebug::endSections() { - // Filter labels by section. - for (size_t n = 0; n < ArangeLabels.size(); n++) { - const SymbolCU &SCU = ArangeLabels[n]; + // Filter labels by section. + for (const SymbolCU &SCU : ArangeLabels) { if (SCU.Sym->isInSection()) { // Make a note of this symbol and it's section. const MCSection *Section = &SCU.Sym->getSection(); @@ -1113,9 +1024,8 @@ void DwarfDebug::endSections() { // Build a list of sections used. std::vector<const MCSection *> Sections; - for (SectionMapType::iterator it = SectionMap.begin(); it != SectionMap.end(); - it++) { - const MCSection *Section = it->first; + for (const auto &it : SectionMap) { + const MCSection *Section = it.first; Sections.push_back(Section); } @@ -1124,15 +1034,15 @@ void DwarfDebug::endSections() { std::sort(Sections.begin(), Sections.end(), SectionSort); // Add terminating symbols for each section. - for (unsigned ID=0;ID<Sections.size();ID++) { + for (unsigned ID = 0, E = Sections.size(); ID != E; ID++) { const MCSection *Section = Sections[ID]; MCSymbol *Sym = NULL; if (Section) { // We can't call MCSection::getLabelEndName, as it's only safe to do so - // if we know the section name up-front. For user-created sections, the resulting - // label may not be valid to use as a label. (section names can use a greater - // set of characters on some systems) + // if we know the section name up-front. For user-created sections, the + // resulting label may not be valid to use as a label. (section names can + // use a greater set of characters on some systems) Sym = Asm->GetTempSymbol("debug_end", ID); Asm->OutStreamer.SwitchSection(Section); Asm->OutStreamer.EmitLabel(Sym); @@ -1145,8 +1055,11 @@ void DwarfDebug::endSections() { // Emit all Dwarf sections that should come after the content. void DwarfDebug::endModule() { + assert(CurFn == 0); + assert(CurMI == 0); - if (!FirstCU) return; + if (!FirstCU) + return; // End any existing sections. // TODO: Does this need to happen? @@ -1155,58 +1068,32 @@ void DwarfDebug::endModule() { // Finalize the debug info for the module. finalizeModuleInfo(); - if (!useSplitDwarf()) { - emitDebugStr(); + emitDebugStr(); - // Emit all the DIEs into a debug info section. - emitDebugInfo(); + // Emit all the DIEs into a debug info section. + emitDebugInfo(); - // Corresponding abbreviations into a abbrev section. - emitAbbreviations(); - - // Emit info into a debug loc section. - emitDebugLoc(); + // Corresponding abbreviations into a abbrev section. + emitAbbreviations(); - // Emit info into a debug aranges section. + // Emit info into a debug aranges section. + if (GenerateARangeSection) emitDebugARanges(); - // Emit info into a debug ranges section. - emitDebugRanges(); - - // Emit info into a debug macinfo section. - emitDebugMacInfo(); + // Emit info into a debug ranges section. + emitDebugRanges(); - } else { - // TODO: Fill this in for separated debug sections and separate - // out information into new sections. - emitDebugStr(); - if (useSplitDwarf()) - emitDebugStrDWO(); - - // Emit the debug info section and compile units. - emitDebugInfo(); + if (useSplitDwarf()) { + emitDebugStrDWO(); emitDebugInfoDWO(); - - // Corresponding abbreviations into a abbrev section. - emitAbbreviations(); emitDebugAbbrevDWO(); - - // Emit info into a debug loc section. - emitDebugLoc(); - - // Emit info into a debug aranges section. - emitDebugARanges(); - - // Emit info into a debug ranges section. - emitDebugRanges(); - - // Emit info into a debug macinfo section. - emitDebugMacInfo(); - + emitDebugLineDWO(); // Emit DWO addresses. InfoHolder.emitAddresses(Asm->getObjFileLowering().getDwarfAddrSection()); - - } + emitDebugLocDWO(); + } else + // Emit info into a debug loc section. + emitDebugLoc(); // Emit info into the dwarf accelerator table sections. if (useDwarfAccelTables()) { @@ -1224,13 +1111,6 @@ void DwarfDebug::endModule() { // clean up. SPMap.clear(); - for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), - E = CUMap.end(); I != E; ++I) - delete I->second; - - for (SmallVectorImpl<CompileUnit *>::iterator I = SkeletonCUs.begin(), - E = SkeletonCUs.end(); I != E; ++I) - delete *I; // Reset these for the next Module if we have one. FirstCU = NULL; @@ -1257,8 +1137,7 @@ DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &DV, } // If Var is a current function argument then add it to CurrentFnArguments list. -bool DwarfDebug::addCurrentFnArgument(const MachineFunction *MF, - DbgVariable *Var, LexicalScope *Scope) { +bool DwarfDebug::addCurrentFnArgument(DbgVariable *Var, LexicalScope *Scope) { if (!LScopes.isCurrentFunctionScope(Scope)) return false; DIVariable DV = Var->getVariable(); @@ -1270,7 +1149,7 @@ bool DwarfDebug::addCurrentFnArgument(const MachineFunction *MF, size_t Size = CurrentFnArguments.size(); if (Size == 0) - CurrentFnArguments.resize(MF->getFunction()->arg_size()); + CurrentFnArguments.resize(CurFn->getFunction()->arg_size()); // llvm::Function argument size is not good indicator of how many // arguments does the function have at source level. if (ArgNo > Size) @@ -1280,31 +1159,26 @@ bool DwarfDebug::addCurrentFnArgument(const MachineFunction *MF, } // Collect variable information from side table maintained by MMI. -void -DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction *MF, - SmallPtrSet<const MDNode *, 16> &Processed) { - MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo(); - for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(), - VE = VMap.end(); VI != VE; ++VI) { - const MDNode *Var = VI->first; - if (!Var) continue; - Processed.insert(Var); - DIVariable DV(Var); - const std::pair<unsigned, DebugLoc> &VP = VI->second; - - LexicalScope *Scope = LScopes.findLexicalScope(VP.second); +void DwarfDebug::collectVariableInfoFromMMITable( + SmallPtrSet<const MDNode *, 16> &Processed) { + for (const auto &VI : MMI->getVariableDbgInfo()) { + if (!VI.Var) + continue; + Processed.insert(VI.Var); + DIVariable DV(VI.Var); + LexicalScope *Scope = LScopes.findLexicalScope(VI.Loc); // If variable scope is not found then skip this variable. if (Scope == 0) continue; - DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.second); + DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VI.Loc); DbgVariable *RegVar = new DbgVariable(DV, AbsDbgVariable, this); - RegVar->setFrameIndex(VP.first); - if (!addCurrentFnArgument(MF, RegVar, Scope)) + RegVar->setFrameIndex(VI.Slot); + if (!addCurrentFnArgument(RegVar, Scope)) addScopeVariable(Scope, RegVar); if (AbsDbgVariable) - AbsDbgVariable->setFrameIndex(VP.first); + AbsDbgVariable->setFrameIndex(VI.Slot); } } @@ -1312,18 +1186,19 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction *MF, // defined reg. static bool isDbgValueInDefinedReg(const MachineInstr *MI) { assert(MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!"); - return MI->getNumOperands() == 3 && - MI->getOperand(0).isReg() && MI->getOperand(0).getReg() && + return MI->getNumOperands() == 3 && MI->getOperand(0).isReg() && + MI->getOperand(0).getReg() && (MI->getOperand(1).isImm() || (MI->getOperand(1).isReg() && MI->getOperand(1).getReg() == 0U)); } // Get .debug_loc entry for the instruction range starting at MI. -static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm, - const MCSymbol *FLabel, - const MCSymbol *SLabel, - const MachineInstr *MI) { - const MDNode *Var = MI->getOperand(MI->getNumOperands() - 1).getMetadata(); +static DebugLocEntry getDebugLocEntry(AsmPrinter *Asm, + const MCSymbol *FLabel, + const MCSymbol *SLabel, + const MachineInstr *MI, + DwarfCompileUnit *Unit) { + const MDNode *Var = MI->getOperand(MI->getNumOperands() - 1).getMetadata(); assert(MI->getNumOperands() == 3); if (MI->getOperand(0).isReg()) { @@ -1334,36 +1209,32 @@ static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm, MLoc.set(MI->getOperand(0).getReg()); else MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm()); - return DotDebugLocEntry(FLabel, SLabel, MLoc, Var); + return DebugLocEntry(FLabel, SLabel, MLoc, Var, Unit); } if (MI->getOperand(0).isImm()) - return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getImm()); + return DebugLocEntry(FLabel, SLabel, MI->getOperand(0).getImm(), Unit); if (MI->getOperand(0).isFPImm()) - return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getFPImm()); + return DebugLocEntry(FLabel, SLabel, MI->getOperand(0).getFPImm(), Unit); if (MI->getOperand(0).isCImm()) - return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getCImm()); + return DebugLocEntry(FLabel, SLabel, MI->getOperand(0).getCImm(), Unit); llvm_unreachable("Unexpected 3 operand DBG_VALUE instruction!"); } // Find variables for each lexical scope. void -DwarfDebug::collectVariableInfo(const MachineFunction *MF, - SmallPtrSet<const MDNode *, 16> &Processed) { +DwarfDebug::collectVariableInfo(SmallPtrSet<const MDNode *, 16> &Processed) { // Grab the variable info that was squirreled away in the MMI side-table. - collectVariableInfoFromMMITable(MF, Processed); + collectVariableInfoFromMMITable(Processed); - for (SmallVectorImpl<const MDNode*>::const_iterator - UVI = UserVariables.begin(), UVE = UserVariables.end(); UVI != UVE; - ++UVI) { - const MDNode *Var = *UVI; + for (const MDNode *Var : UserVariables) { if (Processed.count(Var)) continue; // History contains relevant DBG_VALUE instructions for Var and instructions // clobbering it. - SmallVectorImpl<const MachineInstr*> &History = DbgValues[Var]; + SmallVectorImpl<const MachineInstr *> &History = DbgValues[Var]; if (History.empty()) continue; const MachineInstr *MInsn = History.front(); @@ -1371,7 +1242,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, DIVariable DV(Var); LexicalScope *Scope = NULL; if (DV.getTag() == dwarf::DW_TAG_arg_variable && - DISubprogram(DV.getContext()).describes(MF->getFunction())) + DISubprogram(DV.getContext()).describes(CurFn->getFunction())) Scope = LScopes.getCurrentFunctionScope(); else if (MDNode *IA = DV.getInlinedAt()) Scope = LScopes.findInlinedScope(DebugLoc::getFromDILocation(IA)); @@ -1385,14 +1256,14 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, assert(MInsn->isDebugValue() && "History must begin with debug value"); DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc()); DbgVariable *RegVar = new DbgVariable(DV, AbsVar, this); - if (!addCurrentFnArgument(MF, RegVar, Scope)) + if (!addCurrentFnArgument(RegVar, Scope)) addScopeVariable(Scope, RegVar); if (AbsVar) AbsVar->setMInsn(MInsn); // Simplify ranges that are fully coalesced. - if (History.size() <= 1 || (History.size() == 2 && - MInsn->isIdenticalTo(History.back()))) { + if (History.size() <= 1 || + (History.size() == 2 && MInsn->isIdenticalTo(History.back()))) { RegVar->setMInsn(MInsn); continue; } @@ -1400,14 +1271,21 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, // Handle multiple DBG_VALUE instructions describing one variable. RegVar->setDotDebugLocOffset(DotDebugLocEntries.size()); - for (SmallVectorImpl<const MachineInstr*>::const_iterator - HI = History.begin(), HE = History.end(); HI != HE; ++HI) { + DotDebugLocEntries.resize(DotDebugLocEntries.size() + 1); + DebugLocList &LocList = DotDebugLocEntries.back(); + LocList.Label = + Asm->GetTempSymbol("debug_loc", DotDebugLocEntries.size() - 1); + SmallVector<DebugLocEntry, 4> &DebugLoc = LocList.List; + for (SmallVectorImpl<const MachineInstr *>::const_iterator + HI = History.begin(), + HE = History.end(); + HI != HE; ++HI) { const MachineInstr *Begin = *HI; assert(Begin->isDebugValue() && "Invalid History entry"); // Check if DBG_VALUE is truncating a range. - if (Begin->getNumOperands() > 1 && Begin->getOperand(0).isReg() - && !Begin->getOperand(0).getReg()) + if (Begin->getNumOperands() > 1 && Begin->getOperand(0).isReg() && + !Begin->getOperand(0).getReg()) continue; // Compute the range for a register location. @@ -1421,7 +1299,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, else { const MachineInstr *End = HI[1]; DEBUG(dbgs() << "DotDebugLoc Pair:\n" - << "\t" << *Begin << "\t" << *End << "\n"); + << "\t" << *Begin << "\t" << *End << "\n"); if (End->isDebugValue()) SLabel = getLabelBeforeInsn(End); else { @@ -1433,10 +1311,12 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, } // The value is valid until the next DBG_VALUE or clobber. - DotDebugLocEntries.push_back(getDebugLocEntry(Asm, FLabel, SLabel, - Begin)); + LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); + DwarfCompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode()); + DebugLocEntry Loc = getDebugLocEntry(Asm, FLabel, SLabel, Begin, TheCU); + if (DebugLoc.empty() || !DebugLoc.back().Merge(Loc)) + DebugLoc.push_back(std::move(Loc)); } - DotDebugLocEntries.push_back(DotDebugLocEntry()); } // Collect info for variables that were optimized out. @@ -1465,6 +1345,8 @@ MCSymbol *DwarfDebug::getLabelAfterInsn(const MachineInstr *MI) { // Process beginning of an instruction. void DwarfDebug::beginInstruction(const MachineInstr *MI) { + assert(CurMI == 0); + CurMI = MI; // Check if source location changes, but ignore DBG_VALUE locations. if (!MI->isDebugValue()) { DebugLoc DL = MI->getDebugLoc(); @@ -1487,8 +1369,8 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { } // Insert labels where requested. - DenseMap<const MachineInstr*, MCSymbol*>::iterator I = - LabelsBeforeInsn.find(MI); + DenseMap<const MachineInstr *, MCSymbol *>::iterator I = + LabelsBeforeInsn.find(MI); // No label needed. if (I == LabelsBeforeInsn.end()) @@ -1506,14 +1388,16 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { } // Process end of an instruction. -void DwarfDebug::endInstruction(const MachineInstr *MI) { +void DwarfDebug::endInstruction() { + assert(CurMI != 0); // Don't create a new label after DBG_VALUE instructions. // They don't generate code. - if (!MI->isDebugValue()) + if (!CurMI->isDebugValue()) PrevLabel = 0; - DenseMap<const MachineInstr*, MCSymbol*>::iterator I = - LabelsAfterInsn.find(MI); + DenseMap<const MachineInstr *, MCSymbol *>::iterator I = + LabelsAfterInsn.find(CurMI); + CurMI = 0; // No label needed. if (I == LabelsAfterInsn.end()) @@ -1543,53 +1427,24 @@ void DwarfDebug::identifyScopeMarkers() { const SmallVectorImpl<LexicalScope *> &Children = S->getChildren(); if (!Children.empty()) - for (SmallVectorImpl<LexicalScope *>::const_iterator SI = Children.begin(), - SE = Children.end(); SI != SE; ++SI) - WorkList.push_back(*SI); + WorkList.append(Children.begin(), Children.end()); if (S->isAbstractScope()) continue; - const SmallVectorImpl<InsnRange> &Ranges = S->getRanges(); - if (Ranges.empty()) - continue; - for (SmallVectorImpl<InsnRange>::const_iterator RI = Ranges.begin(), - RE = Ranges.end(); RI != RE; ++RI) { - assert(RI->first && "InsnRange does not have first instruction!"); - assert(RI->second && "InsnRange does not have second instruction!"); - requestLabelBeforeInsn(RI->first); - requestLabelAfterInsn(RI->second); + for (const InsnRange &R : S->getRanges()) { + assert(R.first && "InsnRange does not have first instruction!"); + assert(R.second && "InsnRange does not have second instruction!"); + requestLabelBeforeInsn(R.first); + requestLabelAfterInsn(R.second); } } } -// Get MDNode for DebugLoc's scope. -static MDNode *getScopeNode(DebugLoc DL, const LLVMContext &Ctx) { - if (MDNode *InlinedAt = DL.getInlinedAt(Ctx)) - return getScopeNode(DebugLoc::getFromDILocation(InlinedAt), Ctx); - return DL.getScope(Ctx); -} - -// Walk up the scope chain of given debug loc and find line number info -// for the function. -static DebugLoc getFnDebugLoc(DebugLoc DL, const LLVMContext &Ctx) { - const MDNode *Scope = getScopeNode(DL, Ctx); - DISubprogram SP = getDISubprogram(Scope); - if (SP.isSubprogram()) { - // Check for number of operands since the compatibility is - // cheap here. - if (SP->getNumOperands() > 19) - return DebugLoc::get(SP.getScopeLineNumber(), 0, SP); - else - return DebugLoc::get(SP.getLineNumber(), 0, SP); - } - - return DebugLoc(); -} - // Gather pre-function debug information. Assumes being called immediately // after the function entry point has been emitted. void DwarfDebug::beginFunction(const MachineFunction *MF) { + CurFn = MF; // If there's no debug info for the function we're not going to do anything. if (!MMI->hasDebugInfo()) @@ -1606,14 +1461,14 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // Make sure that each lexical scope will have a begin/end label. identifyScopeMarkers(); - // Set DwarfCompileUnitID in MCContext to the Compile Unit this function + // Set DwarfDwarfCompileUnitID in MCContext to the Compile Unit this function // belongs to so that we add to the correct per-cu line table in the // non-asm case. LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); - CompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode()); + DwarfCompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode()); assert(TheCU && "Unable to find compile unit!"); - if (Asm->TM.hasMCUseLoc() && Asm->OutStreamer.hasRawTextSupport()) - // Use a single line table if we are using .loc and generating assembly. + if (Asm->OutStreamer.hasRawTextSupport()) + // Use a single line table if we are generating assembly. Asm->OutStreamer.getContext().setDwarfCompileUnitID(0); else Asm->OutStreamer.getContext().setDwarfCompileUnitID(TheCU->getUniqueID()); @@ -1670,7 +1525,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // Terminate old register assignments that don't reach MI; MachineFunction::const_iterator PrevMBB = Prev->getParent(); - if (PrevMBB != I && (!AtBlockEntry || llvm::next(PrevMBB) != I) && + if (PrevMBB != I && (!AtBlockEntry || std::next(PrevMBB) != I) && isDbgValueInDefinedReg(Prev)) { // Previous register assignment needs to terminate at the end of // its basic block. @@ -1681,7 +1536,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { DEBUG(dbgs() << "Dropping DBG_VALUE for empty range:\n" << "\t" << *Prev << "\n"); History.pop_back(); - } else if (llvm::next(PrevMBB) != PrevMBB->getParent()->end()) + } else if (std::next(PrevMBB) != PrevMBB->getParent()->end()) // Terminate after LastMI. History.push_back(LastMI); } @@ -1690,7 +1545,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { History.push_back(MI); } else { // Not a DBG_VALUE instruction. - if (!MI->isLabel()) + if (!MI->isPosition()) AtBlockEntry = false; // First known non-DBG_VALUE and non-frame setup location marks @@ -1700,12 +1555,10 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { PrologEndLoc = MI->getDebugLoc(); // Check if the instruction clobbers any registers with debug vars. - for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(), - MOE = MI->operands_end(); - MOI != MOE; ++MOI) { - if (!MOI->isReg() || !MOI->isDef() || !MOI->getReg()) + for (const MachineOperand &MO : MI->operands()) { + if (!MO.isReg() || !MO.isDef() || !MO.getReg()) continue; - for (MCRegAliasIterator AI(MOI->getReg(), TRI, true); AI.isValid(); + for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI) { unsigned Reg = *AI; const MDNode *Var = LiveUserVar[Reg]; @@ -1738,9 +1591,8 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { } } - for (DbgValueHistoryMap::iterator I = DbgValues.begin(), E = DbgValues.end(); - I != E; ++I) { - SmallVectorImpl<const MachineInstr *> &History = I->second; + for (auto &I : DbgValues) { + SmallVectorImpl<const MachineInstr *> &History = I.second; if (History.empty()) continue; @@ -1759,8 +1611,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { } } // Request labels for the full history. - for (unsigned i = 0, e = History.size(); i != e; ++i) { - const MachineInstr *MI = History[i]; + for (const MachineInstr *MI : History) { if (MI->isDebugValue()) requestLabelBeforeInsn(MI); else @@ -1774,7 +1625,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // Record beginning of function. if (!PrologEndLoc.isUnknown()) { DebugLoc FnStartDL = - getFnDebugLoc(PrologEndLoc, MF->getFunction()->getContext()); + PrologEndLoc.getFnDebugLoc(MF->getFunction()->getContext()); recordSourceLine( FnStartDL.getLine(), FnStartDL.getCol(), FnStartDL.getScope(MF->getFunction()->getContext()), @@ -1817,27 +1668,42 @@ void DwarfDebug::addScopeVariable(LexicalScope *LS, DbgVariable *Var) { // Gather and emit post-function debug information. void DwarfDebug::endFunction(const MachineFunction *MF) { - if (!MMI->hasDebugInfo() || LScopes.empty()) return; + // Every beginFunction(MF) call should be followed by an endFunction(MF) call, + // though the beginFunction may not be called at all. + // We should handle both cases. + if (CurFn == 0) + CurFn = MF; + else + assert(CurFn == MF); + assert(CurFn != 0); + + if (!MMI->hasDebugInfo() || LScopes.empty()) { + // If we don't have a lexical scope for this function then there will + // be a hole in the range information. Keep note of this by setting the + // previously used section to nullptr. + PrevSection = nullptr; + PrevCU = nullptr; + CurFn = 0; + return; + } // Define end label for subprogram. - FunctionEndSym = Asm->GetTempSymbol("func_end", - Asm->getFunctionNumber()); + FunctionEndSym = Asm->GetTempSymbol("func_end", Asm->getFunctionNumber()); // Assumes in correct section after the entry point. Asm->OutStreamer.EmitLabel(FunctionEndSym); - // Set DwarfCompileUnitID in MCContext to default value. + + // Set DwarfDwarfCompileUnitID in MCContext to default value. Asm->OutStreamer.getContext().setDwarfCompileUnitID(0); SmallPtrSet<const MDNode *, 16> ProcessedVars; - collectVariableInfo(MF, ProcessedVars); + collectVariableInfo(ProcessedVars); LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); - CompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode()); + DwarfCompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode()); assert(TheCU && "Unable to find compile unit!"); // Construct abstract scopes. - ArrayRef<LexicalScope *> AList = LScopes.getAbstractScopesList(); - for (unsigned i = 0, e = AList.size(); i != e; ++i) { - LexicalScope *AScope = AList[i]; + for (LexicalScope *AScope : LScopes.getAbstractScopesList()) { DISubprogram SP(AScope->getScopeNode()); if (SP.isSubprogram()) { // Collect info for variables that were optimized out. @@ -1861,14 +1727,18 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { } DIE *CurFnDIE = constructScopeDIE(TheCU, FnScope); - - if (!MF->getTarget().Options.DisableFramePointerElim(*MF)) + if (!CurFn->getTarget().Options.DisableFramePointerElim(*CurFn)) TheCU->addFlag(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr); + // Add the range of this function to the list of ranges for the CU. + RangeSpan Span(FunctionBeginSym, FunctionEndSym); + TheCU->addRange(std::move(Span)); + PrevSection = Asm->getCurrentSection(); + PrevCU = TheCU; + // Clear debug info - for (ScopeVariablesMap::iterator - I = ScopeVariables.begin(), E = ScopeVariables.end(); I != E; ++I) - DeleteContainerPointers(I->second); + for (auto &I : ScopeVariables) + DeleteContainerPointers(I.second); ScopeVariables.clear(); DeleteContainerPointers(CurrentFnArguments); UserVariables.clear(); @@ -1877,6 +1747,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { LabelsBeforeInsn.clear(); LabelsAfterInsn.clear(); PrevLabel = NULL; + CurFn = 0; } // Register a source line with debug info. Returns the unique label that was @@ -1886,6 +1757,7 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S, StringRef Fn; StringRef Dir; unsigned Src = 1; + unsigned Discriminator = 0; if (S) { DIDescriptor Scope(S); @@ -1909,13 +1781,16 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S, DILexicalBlock DB(S); Fn = DB.getFilename(); Dir = DB.getDirectory(); + Discriminator = DB.getDiscriminator(); } else llvm_unreachable("Unexpected scope info"); - Src = getOrCreateSourceID(Fn, Dir, - Asm->OutStreamer.getContext().getDwarfCompileUnitID()); + unsigned CUID = Asm->OutStreamer.getContext().getDwarfCompileUnitID(); + Src = static_cast<DwarfCompileUnit *>(InfoHolder.getUnits()[CUID]) + ->getOrCreateSourceID(Fn, Dir); } - Asm->OutStreamer.EmitDwarfLocDirective(Src, Line, Col, Flags, 0, 0, Fn); + Asm->OutStreamer.EmitDwarfLocDirective(Src, Line, Col, Flags, 0, + Discriminator, Fn); } //===----------------------------------------------------------------------===// @@ -1924,39 +1799,36 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S, // Compute the size and offset of a DIE. The offset is relative to start of the // CU. It returns the offset after laying out the DIE. -unsigned -DwarfUnits::computeSizeAndOffset(DIE *Die, unsigned Offset) { - // Get the children. - const std::vector<DIE *> &Children = Die->getChildren(); - +unsigned DwarfFile::computeSizeAndOffset(DIE *Die, unsigned Offset) { // Record the abbreviation. assignAbbrevNumber(Die->getAbbrev()); // Get the abbreviation for this DIE. - unsigned AbbrevNumber = Die->getAbbrevNumber(); - const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1]; + const DIEAbbrev &Abbrev = Die->getAbbrev(); // Set DIE offset Die->setOffset(Offset); // Start the size with the size of abbreviation code. - Offset += MCAsmInfo::getULEB128Size(AbbrevNumber); + Offset += getULEB128Size(Die->getAbbrevNumber()); - const SmallVectorImpl<DIEValue*> &Values = Die->getValues(); - const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev->getData(); + const SmallVectorImpl<DIEValue *> &Values = Die->getValues(); + const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData(); // Size the DIE attribute values. for (unsigned i = 0, N = Values.size(); i < N; ++i) // Size attribute value. Offset += Values[i]->SizeOf(Asm, AbbrevData[i].getForm()); + // Get the children. + const std::vector<DIE *> &Children = Die->getChildren(); + // Size the DIE children if any. if (!Children.empty()) { - assert(Abbrev->getChildrenFlag() == dwarf::DW_CHILDREN_yes && - "Children flag not set"); + assert(Abbrev.hasChildren() && "Children flag not set"); - for (unsigned j = 0, M = Children.size(); j < M; ++j) - Offset = computeSizeAndOffset(Children[j], Offset); + for (DIE *Child : Children) + Offset = computeSizeAndOffset(Child, Offset); // End of children marker. Offset += sizeof(int8_t); @@ -1967,23 +1839,22 @@ DwarfUnits::computeSizeAndOffset(DIE *Die, unsigned Offset) { } // Compute the size and offset for each DIE. -void DwarfUnits::computeSizeAndOffsets() { +void DwarfFile::computeSizeAndOffsets() { // Offset from the first CU in the debug info section is 0 initially. unsigned SecOffset = 0; // Iterate over each compile unit and set the size and offsets for each // DIE within each compile unit. All offsets are CU relative. - for (SmallVectorImpl<CompileUnit *>::iterator I = CUs.begin(), - E = CUs.end(); I != E; ++I) { - (*I)->setDebugInfoOffset(SecOffset); + for (DwarfUnit *TheU : CUs) { + TheU->setDebugInfoOffset(SecOffset); // CU-relative offset is reset to 0 here. - unsigned Offset = sizeof(int32_t) + // Length of Unit Info - (*I)->getHeaderSize(); // Unit-specific headers + unsigned Offset = sizeof(int32_t) + // Length of Unit Info + TheU->getHeaderSize(); // Unit-specific headers // EndOffset here is CU-relative, after laying out // all of the CU DIE. - unsigned EndOffset = computeSizeAndOffset((*I)->getCUDie(), Offset); + unsigned EndOffset = computeSizeAndOffset(TheU->getUnitDie(), Offset); SecOffset += EndOffset; } } @@ -1994,21 +1865,20 @@ void DwarfDebug::emitSectionLabels() { // Dwarf sections base addresses. DwarfInfoSectionSym = - emitSectionSym(Asm, TLOF.getDwarfInfoSection(), "section_info"); + emitSectionSym(Asm, TLOF.getDwarfInfoSection(), "section_info"); + if (useSplitDwarf()) + DwarfInfoDWOSectionSym = + emitSectionSym(Asm, TLOF.getDwarfInfoDWOSection(), "section_info_dwo"); DwarfAbbrevSectionSym = - emitSectionSym(Asm, TLOF.getDwarfAbbrevSection(), "section_abbrev"); + emitSectionSym(Asm, TLOF.getDwarfAbbrevSection(), "section_abbrev"); if (useSplitDwarf()) - DwarfAbbrevDWOSectionSym = - emitSectionSym(Asm, TLOF.getDwarfAbbrevDWOSection(), - "section_abbrev_dwo"); - emitSectionSym(Asm, TLOF.getDwarfARangesSection()); - - if (const MCSection *MacroInfo = TLOF.getDwarfMacroInfoSection()) - emitSectionSym(Asm, MacroInfo); + DwarfAbbrevDWOSectionSym = emitSectionSym( + Asm, TLOF.getDwarfAbbrevDWOSection(), "section_abbrev_dwo"); + if (GenerateARangeSection) + emitSectionSym(Asm, TLOF.getDwarfARangesSection()); DwarfLineSectionSym = - emitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line"); - emitSectionSym(Asm, TLOF.getDwarfLocSection()); + emitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line"); if (GenerateGnuPubSections) { DwarfGnuPubNamesSectionSym = emitSectionSym(Asm, TLOF.getDwarfGnuPubNamesSection()); @@ -2020,39 +1890,36 @@ void DwarfDebug::emitSectionLabels() { } DwarfStrSectionSym = - emitSectionSym(Asm, TLOF.getDwarfStrSection(), "info_string"); + emitSectionSym(Asm, TLOF.getDwarfStrSection(), "info_string"); if (useSplitDwarf()) { DwarfStrDWOSectionSym = - emitSectionSym(Asm, TLOF.getDwarfStrDWOSection(), "skel_string"); + emitSectionSym(Asm, TLOF.getDwarfStrDWOSection(), "skel_string"); DwarfAddrSectionSym = - emitSectionSym(Asm, TLOF.getDwarfAddrSection(), "addr_sec"); - } - DwarfDebugRangeSectionSym = emitSectionSym(Asm, TLOF.getDwarfRangesSection(), - "debug_range"); - - DwarfDebugLocSectionSym = emitSectionSym(Asm, TLOF.getDwarfLocSection(), - "section_debug_loc"); - - TextSectionSym = emitSectionSym(Asm, TLOF.getTextSection(), "text_begin"); - emitSectionSym(Asm, TLOF.getDataSection()); + emitSectionSym(Asm, TLOF.getDwarfAddrSection(), "addr_sec"); + DwarfDebugLocSectionSym = + emitSectionSym(Asm, TLOF.getDwarfLocDWOSection(), "skel_loc"); + } else + DwarfDebugLocSectionSym = + emitSectionSym(Asm, TLOF.getDwarfLocSection(), "section_debug_loc"); + DwarfDebugRangeSectionSym = + emitSectionSym(Asm, TLOF.getDwarfRangesSection(), "debug_range"); } // Recursively emits a debug information entry. -void DwarfDebug::emitDIE(DIE *Die, ArrayRef<DIEAbbrev *> Abbrevs) { +void DwarfDebug::emitDIE(DIE *Die) { // Get the abbreviation for this DIE. - unsigned AbbrevNumber = Die->getAbbrevNumber(); - const DIEAbbrev *Abbrev = Abbrevs[AbbrevNumber - 1]; + const DIEAbbrev &Abbrev = Die->getAbbrev(); // Emit the code (index) for the abbreviation. if (Asm->isVerbose()) - Asm->OutStreamer.AddComment("Abbrev [" + Twine(AbbrevNumber) + "] 0x" + - Twine::utohexstr(Die->getOffset()) + ":0x" + - Twine::utohexstr(Die->getSize()) + " " + - dwarf::TagString(Abbrev->getTag())); - Asm->EmitULEB128(AbbrevNumber); + Asm->OutStreamer.AddComment("Abbrev [" + Twine(Abbrev.getNumber()) + + "] 0x" + Twine::utohexstr(Die->getOffset()) + + ":0x" + Twine::utohexstr(Die->getSize()) + " " + + dwarf::TagString(Abbrev.getTag())); + Asm->EmitULEB128(Abbrev.getNumber()); - const SmallVectorImpl<DIEValue*> &Values = Die->getValues(); - const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev->getData(); + const SmallVectorImpl<DIEValue *> &Values = Die->getValues(); + const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData(); // Emit the DIE attribute values. for (unsigned i = 0, N = Values.size(); i < N; ++i) { @@ -2060,159 +1927,73 @@ void DwarfDebug::emitDIE(DIE *Die, ArrayRef<DIEAbbrev *> Abbrevs) { dwarf::Form Form = AbbrevData[i].getForm(); assert(Form && "Too many attributes for DIE (check abbreviation)"); - if (Asm->isVerbose()) + if (Asm->isVerbose()) { Asm->OutStreamer.AddComment(dwarf::AttributeString(Attr)); - - switch (Attr) { - case dwarf::DW_AT_abstract_origin: - case dwarf::DW_AT_type: - case dwarf::DW_AT_friend: - case dwarf::DW_AT_specification: - case dwarf::DW_AT_import: - case dwarf::DW_AT_containing_type: { - DIEEntry *E = cast<DIEEntry>(Values[i]); - DIE *Origin = E->getEntry(); - unsigned Addr = Origin->getOffset(); - if (Form == dwarf::DW_FORM_ref_addr) { - assert(!useSplitDwarf() && "TODO: dwo files can't have relocations."); - // For DW_FORM_ref_addr, output the offset from beginning of debug info - // section. Origin->getOffset() returns the offset from start of the - // compile unit. - CompileUnit *CU = CUDieMap.lookup(Origin->getCompileUnit()); - assert(CU && "CUDie should belong to a CU."); - Addr += CU->getDebugInfoOffset(); - if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - Asm->EmitLabelPlusOffset(DwarfInfoSectionSym, Addr, - DIEEntry::getRefAddrSize(Asm)); - else - Asm->EmitLabelOffsetDifference(DwarfInfoSectionSym, Addr, - DwarfInfoSectionSym, - DIEEntry::getRefAddrSize(Asm)); - } else { - // Make sure Origin belong to the same CU. - assert(Die->getCompileUnit() == Origin->getCompileUnit() && - "The referenced DIE should belong to the same CU in ref4"); - Asm->EmitInt32(Addr); - } - break; - } - case dwarf::DW_AT_ranges: { - // DW_AT_range Value encodes offset in debug_range section. - DIEInteger *V = cast<DIEInteger>(Values[i]); - - if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) { - Asm->EmitLabelPlusOffset(DwarfDebugRangeSectionSym, - V->getValue(), - 4); - } else { - Asm->EmitLabelOffsetDifference(DwarfDebugRangeSectionSym, - V->getValue(), - DwarfDebugRangeSectionSym, - 4); - } - break; - } - case dwarf::DW_AT_location: { - if (DIELabel *L = dyn_cast<DIELabel>(Values[i])) { - if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - Asm->EmitSectionOffset(L->getValue(), DwarfDebugLocSectionSym); - else - Asm->EmitLabelDifference(L->getValue(), DwarfDebugLocSectionSym, 4); - } else { - Values[i]->EmitValue(Asm, Form); - } - break; - } - case dwarf::DW_AT_accessibility: { - if (Asm->isVerbose()) { - DIEInteger *V = cast<DIEInteger>(Values[i]); - Asm->OutStreamer.AddComment(dwarf::AccessibilityString(V->getValue())); - } - Values[i]->EmitValue(Asm, Form); - break; - } - default: - // Emit an attribute using the defined form. - Values[i]->EmitValue(Asm, Form); - break; + if (Attr == dwarf::DW_AT_accessibility) + Asm->OutStreamer.AddComment(dwarf::AccessibilityString( + cast<DIEInteger>(Values[i])->getValue())); } + + // Emit an attribute using the defined form. + Values[i]->EmitValue(Asm, Form); } // Emit the DIE children if any. - if (Abbrev->getChildrenFlag() == dwarf::DW_CHILDREN_yes) { + if (Abbrev.hasChildren()) { const std::vector<DIE *> &Children = Die->getChildren(); - for (unsigned j = 0, M = Children.size(); j < M; ++j) - emitDIE(Children[j], Abbrevs); + for (DIE *Child : Children) + emitDIE(Child); - if (Asm->isVerbose()) - Asm->OutStreamer.AddComment("End Of Children Mark"); + Asm->OutStreamer.AddComment("End Of Children Mark"); Asm->EmitInt8(0); } } // Emit the various dwarf units to the unit section USection with // the abbreviations going into ASection. -void DwarfUnits::emitUnits(DwarfDebug *DD, - const MCSection *USection, - const MCSection *ASection, - const MCSymbol *ASectionSym) { - Asm->OutStreamer.SwitchSection(USection); - for (SmallVectorImpl<CompileUnit *>::iterator I = CUs.begin(), - E = CUs.end(); I != E; ++I) { - CompileUnit *TheCU = *I; - DIE *Die = TheCU->getCUDie(); +void DwarfFile::emitUnits(DwarfDebug *DD, const MCSymbol *ASectionSym) { + for (DwarfUnit *TheU : CUs) { + DIE *Die = TheU->getUnitDie(); + const MCSection *USection = TheU->getSection(); + Asm->OutStreamer.SwitchSection(USection); // Emit the compile units header. - Asm->OutStreamer - .EmitLabel(Asm->GetTempSymbol(USection->getLabelBeginName(), - TheCU->getUniqueID())); + Asm->OutStreamer.EmitLabel(TheU->getLabelBegin()); // Emit size of content not including length itself Asm->OutStreamer.AddComment("Length of Unit"); - Asm->EmitInt32(TheCU->getHeaderSize() + Die->getSize()); + Asm->EmitInt32(TheU->getHeaderSize() + Die->getSize()); - TheCU->emitHeader(ASection, ASectionSym); + TheU->emitHeader(ASectionSym); - DD->emitDIE(Die, Abbreviations); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol(USection->getLabelEndName(), - TheCU->getUniqueID())); + DD->emitDIE(Die); + Asm->OutStreamer.EmitLabel(TheU->getLabelEnd()); } } // Emit the debug info section. void DwarfDebug::emitDebugInfo() { - DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; + DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; - Holder.emitUnits(this, Asm->getObjFileLowering().getDwarfInfoSection(), - Asm->getObjFileLowering().getDwarfAbbrevSection(), - DwarfAbbrevSectionSym); + Holder.emitUnits(this, DwarfAbbrevSectionSym); } // Emit the abbreviation section. void DwarfDebug::emitAbbreviations() { - if (!useSplitDwarf()) - emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevSection(), - &Abbreviations); - else - emitSkeletonAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevSection()); + DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; + + Holder.emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevSection()); } -void DwarfDebug::emitAbbrevs(const MCSection *Section, - std::vector<DIEAbbrev *> *Abbrevs) { +void DwarfFile::emitAbbrevs(const MCSection *Section) { // Check to see if it is worth the effort. - if (!Abbrevs->empty()) { + if (!Abbreviations.empty()) { // Start the debug abbrev section. Asm->OutStreamer.SwitchSection(Section); - MCSymbol *Begin = Asm->GetTempSymbol(Section->getLabelBeginName()); - Asm->OutStreamer.EmitLabel(Begin); - // For each abbrevation. - for (unsigned i = 0, N = Abbrevs->size(); i < N; ++i) { - // Get abbreviation data - const DIEAbbrev *Abbrev = Abbrevs->at(i); - + for (const DIEAbbrev *Abbrev : Abbreviations) { // Emit the abbrevations code (base 1 index.) Asm->EmitULEB128(Abbrev->getNumber(), "Abbreviation Code"); @@ -2222,9 +2003,6 @@ void DwarfDebug::emitAbbrevs(const MCSection *Section, // Mark end of abbreviations. Asm->EmitULEB128(0, "EOM(3)"); - - MCSymbol *End = Asm->GetTempSymbol(Section->getLabelEndName()); - Asm->OutStreamer.EmitLabel(End); } } @@ -2241,8 +2019,9 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) { Asm->OutStreamer.AddComment("Section end label"); - Asm->OutStreamer.EmitSymbolValue(Asm->GetTempSymbol("section_end",SectionEnd), - Asm->getDataLayout().getPointerSize()); + Asm->OutStreamer.EmitSymbolValue( + Asm->GetTempSymbol("section_end", SectionEnd), + Asm->getDataLayout().getPointerSize()); // Mark end of matrix. Asm->OutStreamer.AddComment("DW_LNE_end_sequence"); @@ -2253,25 +2032,19 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) { // Emit visible names into a hashed accelerator table section. void DwarfDebug::emitAccelNames() { - DwarfAccelTable AT(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, - dwarf::DW_FORM_data4)); - for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), - E = CUMap.end(); I != E; ++I) { - CompileUnit *TheCU = I->second; - const StringMap<std::vector<DIE*> > &Names = TheCU->getAccelNames(); - for (StringMap<std::vector<DIE*> >::const_iterator - GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) { - StringRef Name = GI->getKey(); - const std::vector<DIE *> &Entities = GI->second; - for (std::vector<DIE *>::const_iterator DI = Entities.begin(), - DE = Entities.end(); DI != DE; ++DI) - AT.AddName(Name, (*DI)); + DwarfAccelTable AT( + DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)); + for (DwarfUnit *TheU : getUnits()) { + for (const auto &GI : TheU->getAccelNames()) { + StringRef Name = GI.getKey(); + for (const DIE *D : GI.second) + AT.AddName(Name, D); } } AT.FinalizeTable(Asm, "Names"); Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfAccelNamesSection()); + Asm->getObjFileLowering().getDwarfAccelNamesSection()); MCSymbol *SectionBegin = Asm->GetTempSymbol("names_begin"); Asm->OutStreamer.EmitLabel(SectionBegin); @@ -2282,25 +2055,19 @@ void DwarfDebug::emitAccelNames() { // Emit objective C classes and categories into a hashed accelerator table // section. void DwarfDebug::emitAccelObjC() { - DwarfAccelTable AT(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, - dwarf::DW_FORM_data4)); - for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), - E = CUMap.end(); I != E; ++I) { - CompileUnit *TheCU = I->second; - const StringMap<std::vector<DIE*> > &Names = TheCU->getAccelObjC(); - for (StringMap<std::vector<DIE*> >::const_iterator - GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) { - StringRef Name = GI->getKey(); - const std::vector<DIE *> &Entities = GI->second; - for (std::vector<DIE *>::const_iterator DI = Entities.begin(), - DE = Entities.end(); DI != DE; ++DI) - AT.AddName(Name, (*DI)); + DwarfAccelTable AT( + DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)); + for (DwarfUnit *TheU : getUnits()) { + for (const auto &GI : TheU->getAccelObjC()) { + StringRef Name = GI.getKey(); + for (const DIE *D : GI.second) + AT.AddName(Name, D); } } AT.FinalizeTable(Asm, "ObjC"); - Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering() - .getDwarfAccelObjCSection()); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfAccelObjCSection()); MCSymbol *SectionBegin = Asm->GetTempSymbol("objc_begin"); Asm->OutStreamer.EmitLabel(SectionBegin); @@ -2310,25 +2077,19 @@ void DwarfDebug::emitAccelObjC() { // Emit namespace dies into a hashed accelerator table. void DwarfDebug::emitAccelNamespaces() { - DwarfAccelTable AT(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, - dwarf::DW_FORM_data4)); - for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), - E = CUMap.end(); I != E; ++I) { - CompileUnit *TheCU = I->second; - const StringMap<std::vector<DIE*> > &Names = TheCU->getAccelNamespace(); - for (StringMap<std::vector<DIE*> >::const_iterator - GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) { - StringRef Name = GI->getKey(); - const std::vector<DIE *> &Entities = GI->second; - for (std::vector<DIE *>::const_iterator DI = Entities.begin(), - DE = Entities.end(); DI != DE; ++DI) - AT.AddName(Name, (*DI)); + DwarfAccelTable AT( + DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)); + for (DwarfUnit *TheU : getUnits()) { + for (const auto &GI : TheU->getAccelNamespace()) { + StringRef Name = GI.getKey(); + for (const DIE *D : GI.second) + AT.AddName(Name, D); } } AT.FinalizeTable(Asm, "namespac"); - Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering() - .getDwarfAccelNamespaceSection()); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfAccelNamespaceSection()); MCSymbol *SectionBegin = Asm->GetTempSymbol("namespac_begin"); Asm->OutStreamer.EmitLabel(SectionBegin); @@ -2339,31 +2100,24 @@ void DwarfDebug::emitAccelNamespaces() { // Emit type dies into a hashed accelerator table. void DwarfDebug::emitAccelTypes() { std::vector<DwarfAccelTable::Atom> Atoms; - Atoms.push_back(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, - dwarf::DW_FORM_data4)); - Atoms.push_back(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_tag, - dwarf::DW_FORM_data2)); - Atoms.push_back(DwarfAccelTable::Atom(dwarf::DW_ATOM_type_flags, - dwarf::DW_FORM_data1)); + Atoms.push_back( + DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)); + Atoms.push_back( + DwarfAccelTable::Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2)); + Atoms.push_back( + DwarfAccelTable::Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)); DwarfAccelTable AT(Atoms); - for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), - E = CUMap.end(); I != E; ++I) { - CompileUnit *TheCU = I->second; - const StringMap<std::vector<std::pair<DIE*, unsigned > > > &Names - = TheCU->getAccelTypes(); - for (StringMap<std::vector<std::pair<DIE*, unsigned> > >::const_iterator - GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) { - StringRef Name = GI->getKey(); - const std::vector<std::pair<DIE *, unsigned> > &Entities = GI->second; - for (std::vector<std::pair<DIE *, unsigned> >::const_iterator DI - = Entities.begin(), DE = Entities.end(); DI !=DE; ++DI) - AT.AddName(Name, (*DI).first, (*DI).second); + for (DwarfUnit *TheU : getUnits()) { + for (const auto &GI : TheU->getAccelTypes()) { + StringRef Name = GI.getKey(); + for (const auto &DI : GI.second) + AT.AddName(Name, DI.first, DI.second); } } AT.FinalizeTable(Asm, "types"); - Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering() - .getDwarfAccelTypesSection()); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfAccelTypesSection()); MCSymbol *SectionBegin = Asm->GetTempSymbol("types_begin"); Asm->OutStreamer.EmitLabel(SectionBegin); @@ -2386,8 +2140,8 @@ void DwarfDebug::emitAccelTypes() { // reference in the pubname header doesn't change. /// computeIndexValue - Compute the gdb index value for the DIE and CU. -static dwarf::PubIndexEntryDescriptor computeIndexValue(CompileUnit *CU, - DIE *Die) { +static dwarf::PubIndexEntryDescriptor computeIndexValue(DwarfUnit *CU, + const DIE *Die) { dwarf::GDBIndexEntryLinkage Linkage = dwarf::GIEL_STATIC; // We could have a specification DIE that has our most of our knowledge, @@ -2431,176 +2185,109 @@ static dwarf::PubIndexEntryDescriptor computeIndexValue(CompileUnit *CU, /// emitDebugPubNames - Emit visible names into a debug pubnames section. /// void DwarfDebug::emitDebugPubNames(bool GnuStyle) { - const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection(); const MCSection *PSec = GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubNamesSection() : Asm->getObjFileLowering().getDwarfPubNamesSection(); - typedef DenseMap<const MDNode*, CompileUnit*> CUMapType; - for (CUMapType::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) { - CompileUnit *TheCU = I->second; - unsigned ID = TheCU->getUniqueID(); + emitDebugPubSection(GnuStyle, PSec, "Names", &DwarfUnit::getGlobalNames); +} + +void DwarfDebug::emitDebugPubSection( + bool GnuStyle, const MCSection *PSec, StringRef Name, + const StringMap<const DIE *> &(DwarfUnit::*Accessor)() const) { + for (const auto &NU : CUMap) { + DwarfCompileUnit *TheU = NU.second; + + const auto &Globals = (TheU->*Accessor)(); + + if (Globals.empty()) + continue; + + if (auto Skeleton = static_cast<DwarfCompileUnit *>(TheU->getSkeleton())) + TheU = Skeleton; + unsigned ID = TheU->getUniqueID(); // Start the dwarf pubnames section. Asm->OutStreamer.SwitchSection(PSec); - // Emit a label so we can reference the beginning of this pubname section. - if (GnuStyle) - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("gnu_pubnames", - TheCU->getUniqueID())); - // Emit the header. - Asm->OutStreamer.AddComment("Length of Public Names Info"); - Asm->EmitLabelDifference(Asm->GetTempSymbol("pubnames_end", ID), - Asm->GetTempSymbol("pubnames_begin", ID), 4); + Asm->OutStreamer.AddComment("Length of Public " + Name + " Info"); + MCSymbol *BeginLabel = Asm->GetTempSymbol("pub" + Name + "_begin", ID); + MCSymbol *EndLabel = Asm->GetTempSymbol("pub" + Name + "_end", ID); + Asm->EmitLabelDifference(EndLabel, BeginLabel, 4); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_begin", ID)); + Asm->OutStreamer.EmitLabel(BeginLabel); Asm->OutStreamer.AddComment("DWARF Version"); Asm->EmitInt16(dwarf::DW_PUBNAMES_VERSION); Asm->OutStreamer.AddComment("Offset of Compilation Unit Info"); - Asm->EmitSectionOffset(Asm->GetTempSymbol(ISec->getLabelBeginName(), ID), - DwarfInfoSectionSym); + Asm->EmitSectionOffset(TheU->getLabelBegin(), TheU->getSectionSym()); Asm->OutStreamer.AddComment("Compilation Unit Length"); - Asm->EmitLabelDifference(Asm->GetTempSymbol(ISec->getLabelEndName(), ID), - Asm->GetTempSymbol(ISec->getLabelBeginName(), ID), - 4); + Asm->EmitLabelDifference(TheU->getLabelEnd(), TheU->getLabelBegin(), 4); // Emit the pubnames for this compilation unit. - const StringMap<DIE*> &Globals = TheCU->getGlobalNames(); - for (StringMap<DIE*>::const_iterator - GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) { - const char *Name = GI->getKeyData(); - DIE *Entity = GI->second; + for (const auto &GI : Globals) { + const char *Name = GI.getKeyData(); + const DIE *Entity = GI.second; Asm->OutStreamer.AddComment("DIE offset"); Asm->EmitInt32(Entity->getOffset()); if (GnuStyle) { - dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheCU, Entity); + dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheU, Entity); Asm->OutStreamer.AddComment( Twine("Kind: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) + ", " + dwarf::GDBIndexEntryLinkageString(Desc.Linkage)); Asm->EmitInt8(Desc.toBits()); } - if (Asm->isVerbose()) - Asm->OutStreamer.AddComment("External Name"); - Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1)); + Asm->OutStreamer.AddComment("External Name"); + Asm->OutStreamer.EmitBytes(StringRef(Name, GI.getKeyLength() + 1)); } Asm->OutStreamer.AddComment("End Mark"); Asm->EmitInt32(0); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_end", ID)); + Asm->OutStreamer.EmitLabel(EndLabel); } } void DwarfDebug::emitDebugPubTypes(bool GnuStyle) { - const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection(); const MCSection *PSec = GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubTypesSection() : Asm->getObjFileLowering().getDwarfPubTypesSection(); - for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), - E = CUMap.end(); - I != E; ++I) { - CompileUnit *TheCU = I->second; - // Start the dwarf pubtypes section. - Asm->OutStreamer.SwitchSection(PSec); - - // Emit a label so we can reference the beginning of this pubtype section. - if (GnuStyle) - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("gnu_pubtypes", - TheCU->getUniqueID())); - - // Emit the header. - Asm->OutStreamer.AddComment("Length of Public Types Info"); - Asm->EmitLabelDifference( - Asm->GetTempSymbol("pubtypes_end", TheCU->getUniqueID()), - Asm->GetTempSymbol("pubtypes_begin", TheCU->getUniqueID()), 4); - - Asm->OutStreamer.EmitLabel( - Asm->GetTempSymbol("pubtypes_begin", TheCU->getUniqueID())); - - if (Asm->isVerbose()) - Asm->OutStreamer.AddComment("DWARF Version"); - Asm->EmitInt16(dwarf::DW_PUBTYPES_VERSION); - - Asm->OutStreamer.AddComment("Offset of Compilation Unit Info"); - Asm->EmitSectionOffset( - Asm->GetTempSymbol(ISec->getLabelBeginName(), TheCU->getUniqueID()), - DwarfInfoSectionSym); - - Asm->OutStreamer.AddComment("Compilation Unit Length"); - Asm->EmitLabelDifference( - Asm->GetTempSymbol(ISec->getLabelEndName(), TheCU->getUniqueID()), - Asm->GetTempSymbol(ISec->getLabelBeginName(), TheCU->getUniqueID()), 4); - - // Emit the pubtypes. - const StringMap<DIE *> &Globals = TheCU->getGlobalTypes(); - for (StringMap<DIE *>::const_iterator GI = Globals.begin(), - GE = Globals.end(); - GI != GE; ++GI) { - const char *Name = GI->getKeyData(); - DIE *Entity = GI->second; - - if (Asm->isVerbose()) - Asm->OutStreamer.AddComment("DIE offset"); - Asm->EmitInt32(Entity->getOffset()); - - if (GnuStyle) { - dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheCU, Entity); - Asm->OutStreamer.AddComment( - Twine("Kind: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) + ", " + - dwarf::GDBIndexEntryLinkageString(Desc.Linkage)); - Asm->EmitInt8(Desc.toBits()); - } - - if (Asm->isVerbose()) - Asm->OutStreamer.AddComment("External Name"); - - // Emit the name with a terminating null byte. - Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength() + 1)); - } - - Asm->OutStreamer.AddComment("End Mark"); - Asm->EmitInt32(0); - Asm->OutStreamer.EmitLabel( - Asm->GetTempSymbol("pubtypes_end", TheCU->getUniqueID())); - } + emitDebugPubSection(GnuStyle, PSec, "Types", &DwarfUnit::getGlobalTypes); } // Emit strings into a string section. -void DwarfUnits::emitStrings(const MCSection *StrSection, - const MCSection *OffsetSection = NULL, - const MCSymbol *StrSecSym = NULL) { +void DwarfFile::emitStrings(const MCSection *StrSection, + const MCSection *OffsetSection = NULL, + const MCSymbol *StrSecSym = NULL) { - if (StringPool.empty()) return; + if (StringPool.empty()) + return; // Start the dwarf str section. Asm->OutStreamer.SwitchSection(StrSection); // Get all of the string pool entries and put them in an array by their ID so // we can sort them. - SmallVector<std::pair<unsigned, - StringMapEntry<std::pair<MCSymbol*, unsigned> >*>, 64> Entries; + SmallVector<std::pair<unsigned, const StrPool::value_type *>, 64 > Entries; - for (StringMap<std::pair<MCSymbol*, unsigned> >::iterator - I = StringPool.begin(), E = StringPool.end(); - I != E; ++I) - Entries.push_back(std::make_pair(I->second.second, &*I)); + for (const auto &I : StringPool) + Entries.push_back(std::make_pair(I.second.second, &I)); array_pod_sort(Entries.begin(), Entries.end()); - for (unsigned i = 0, e = Entries.size(); i != e; ++i) { + for (const auto &Entry : Entries) { // Emit a label for reference from debug information entries. - Asm->OutStreamer.EmitLabel(Entries[i].second->getValue().first); + Asm->OutStreamer.EmitLabel(Entry.second->getValue().first); // Emit the string itself with a terminating null byte. - Asm->OutStreamer.EmitBytes(StringRef(Entries[i].second->getKeyData(), - Entries[i].second->getKeyLength()+1)); + Asm->OutStreamer.EmitBytes(StringRef(Entry.second->getKeyData(), + Entry.second->getKeyLength() + 1)); } // If we've got an offset section go ahead and emit that now as well. @@ -2608,17 +2295,18 @@ void DwarfUnits::emitStrings(const MCSection *StrSection, Asm->OutStreamer.SwitchSection(OffsetSection); unsigned offset = 0; unsigned size = 4; // FIXME: DWARF64 is 8. - for (unsigned i = 0, e = Entries.size(); i != e; ++i) { + for (const auto &Entry : Entries) { Asm->OutStreamer.EmitIntValue(offset, size); - offset += Entries[i].second->getKeyLength() + 1; + offset += Entry.second->getKeyLength() + 1; } } } -// Emit strings into a string section. -void DwarfUnits::emitAddresses(const MCSection *AddrSection) { +// Emit addresses into the section given. +void DwarfFile::emitAddresses(const MCSection *AddrSection) { - if (AddressPool.empty()) return; + if (AddressPool.empty()) + return; // Start the dwarf addr section. Asm->OutStreamer.SwitchSection(AddrSection); @@ -2626,147 +2314,143 @@ void DwarfUnits::emitAddresses(const MCSection *AddrSection) { // Order the address pool entries by ID SmallVector<const MCExpr *, 64> Entries(AddressPool.size()); - for (DenseMap<const MCExpr *, unsigned>::iterator I = AddressPool.begin(), - E = AddressPool.end(); - I != E; ++I) - Entries[I->second] = I->first; - - for (unsigned i = 0, e = Entries.size(); i != e; ++i) { - // Emit an expression for reference from debug information entries. - if (const MCExpr *Expr = Entries[i]) - Asm->OutStreamer.EmitValue(Expr, Asm->getDataLayout().getPointerSize()); - else - Asm->OutStreamer.EmitIntValue(0, Asm->getDataLayout().getPointerSize()); - } + for (const auto &I : AddressPool) + Entries[I.second.Number] = + I.second.TLS + ? Asm->getObjFileLowering().getDebugThreadLocalSymbol(I.first) + : MCSymbolRefExpr::Create(I.first, Asm->OutContext); + for (const MCExpr *Entry : Entries) + Asm->OutStreamer.EmitValue(Entry, Asm->getDataLayout().getPointerSize()); } // Emit visible names into a debug str section. void DwarfDebug::emitDebugStr() { - DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; + DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection()); } -// Emit locations into the debug loc section. -void DwarfDebug::emitDebugLoc() { - if (DotDebugLocEntries.empty()) - return; - - for (SmallVectorImpl<DotDebugLocEntry>::iterator - I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end(); - I != E; ++I) { - DotDebugLocEntry &Entry = *I; - if (I + 1 != DotDebugLocEntries.end()) - Entry.Merge(I+1); - } - - // Start the dwarf loc section. - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfLocSection()); - unsigned char Size = Asm->getDataLayout().getPointerSize(); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", 0)); - unsigned index = 1; - for (SmallVectorImpl<DotDebugLocEntry>::iterator - I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end(); - I != E; ++I, ++index) { - DotDebugLocEntry &Entry = *I; - if (Entry.isMerged()) continue; - if (Entry.isEmpty()) { - Asm->OutStreamer.EmitIntValue(0, Size); - Asm->OutStreamer.EmitIntValue(0, Size); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", index)); +void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer, + const DebugLocEntry &Entry) { + DIVariable DV(Entry.getVariable()); + if (Entry.isInt()) { + DIBasicType BTy(resolve(DV.getType())); + if (BTy.Verify() && (BTy.getEncoding() == dwarf::DW_ATE_signed || + BTy.getEncoding() == dwarf::DW_ATE_signed_char)) { + Streamer.EmitInt8(dwarf::DW_OP_consts, "DW_OP_consts"); + Streamer.EmitSLEB128(Entry.getInt()); } else { - Asm->OutStreamer.EmitSymbolValue(Entry.getBeginSym(), Size); - Asm->OutStreamer.EmitSymbolValue(Entry.getEndSym(), Size); - DIVariable DV(Entry.getVariable()); - Asm->OutStreamer.AddComment("Loc expr size"); - MCSymbol *begin = Asm->OutStreamer.getContext().CreateTempSymbol(); - MCSymbol *end = Asm->OutStreamer.getContext().CreateTempSymbol(); - Asm->EmitLabelDifference(end, begin, 2); - Asm->OutStreamer.EmitLabel(begin); - if (Entry.isInt()) { - DIBasicType BTy(DV.getType()); - if (BTy.Verify() && - (BTy.getEncoding() == dwarf::DW_ATE_signed - || BTy.getEncoding() == dwarf::DW_ATE_signed_char)) { - Asm->OutStreamer.AddComment("DW_OP_consts"); - Asm->EmitInt8(dwarf::DW_OP_consts); - Asm->EmitSLEB128(Entry.getInt()); + Streamer.EmitInt8(dwarf::DW_OP_constu, "DW_OP_constu"); + Streamer.EmitULEB128(Entry.getInt()); + } + } else if (Entry.isLocation()) { + MachineLocation Loc = Entry.getLoc(); + if (!DV.hasComplexAddress()) + // Regular entry. + Asm->EmitDwarfRegOp(Streamer, Loc, DV.isIndirect()); + else { + // Complex address entry. + unsigned N = DV.getNumAddrElements(); + unsigned i = 0; + if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) { + if (Loc.getOffset()) { + i = 2; + Asm->EmitDwarfRegOp(Streamer, Loc, DV.isIndirect()); + Streamer.EmitInt8(dwarf::DW_OP_deref, "DW_OP_deref"); + Streamer.EmitInt8(dwarf::DW_OP_plus_uconst, "DW_OP_plus_uconst"); + Streamer.EmitSLEB128(DV.getAddrElement(1)); } else { - Asm->OutStreamer.AddComment("DW_OP_constu"); - Asm->EmitInt8(dwarf::DW_OP_constu); - Asm->EmitULEB128(Entry.getInt()); + // If first address element is OpPlus then emit + // DW_OP_breg + Offset instead of DW_OP_reg + Offset. + MachineLocation TLoc(Loc.getReg(), DV.getAddrElement(1)); + Asm->EmitDwarfRegOp(Streamer, TLoc, DV.isIndirect()); + i = 2; } - } else if (Entry.isLocation()) { - MachineLocation Loc = Entry.getLoc(); - if (!DV.hasComplexAddress()) - // Regular entry. - Asm->EmitDwarfRegOp(Loc, DV.isIndirect()); - else { - // Complex address entry. - unsigned N = DV.getNumAddrElements(); - unsigned i = 0; - if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) { - if (Loc.getOffset()) { - i = 2; - Asm->EmitDwarfRegOp(Loc, DV.isIndirect()); - Asm->OutStreamer.AddComment("DW_OP_deref"); - Asm->EmitInt8(dwarf::DW_OP_deref); - Asm->OutStreamer.AddComment("DW_OP_plus_uconst"); - Asm->EmitInt8(dwarf::DW_OP_plus_uconst); - Asm->EmitSLEB128(DV.getAddrElement(1)); - } else { - // If first address element is OpPlus then emit - // DW_OP_breg + Offset instead of DW_OP_reg + Offset. - MachineLocation TLoc(Loc.getReg(), DV.getAddrElement(1)); - Asm->EmitDwarfRegOp(TLoc, DV.isIndirect()); - i = 2; - } - } else { - Asm->EmitDwarfRegOp(Loc, DV.isIndirect()); - } + } else { + Asm->EmitDwarfRegOp(Streamer, Loc, DV.isIndirect()); + } - // Emit remaining complex address elements. - for (; i < N; ++i) { - uint64_t Element = DV.getAddrElement(i); - if (Element == DIBuilder::OpPlus) { - Asm->EmitInt8(dwarf::DW_OP_plus_uconst); - Asm->EmitULEB128(DV.getAddrElement(++i)); - } else if (Element == DIBuilder::OpDeref) { - if (!Loc.isReg()) - Asm->EmitInt8(dwarf::DW_OP_deref); - } else - llvm_unreachable("unknown Opcode found in complex address"); - } - } + // Emit remaining complex address elements. + for (; i < N; ++i) { + uint64_t Element = DV.getAddrElement(i); + if (Element == DIBuilder::OpPlus) { + Streamer.EmitInt8(dwarf::DW_OP_plus_uconst, "DW_OP_plus_uconst"); + Streamer.EmitULEB128(DV.getAddrElement(++i)); + } else if (Element == DIBuilder::OpDeref) { + if (!Loc.isReg()) + Streamer.EmitInt8(dwarf::DW_OP_deref, "DW_OP_deref"); + } else + llvm_unreachable("unknown Opcode found in complex address"); } - // else ... ignore constant fp. There is not any good way to - // to represent them here in dwarf. - Asm->OutStreamer.EmitLabel(end); } } + // else ... ignore constant fp. There is not any good way to + // to represent them here in dwarf. + // FIXME: ^ } -struct SymbolCUSorter { - SymbolCUSorter(const MCStreamer &s) : Streamer(s) {} - const MCStreamer &Streamer; +void DwarfDebug::emitDebugLocEntryLocation(const DebugLocEntry &Entry) { + Asm->OutStreamer.AddComment("Loc expr size"); + MCSymbol *begin = Asm->OutStreamer.getContext().CreateTempSymbol(); + MCSymbol *end = Asm->OutStreamer.getContext().CreateTempSymbol(); + Asm->EmitLabelDifference(end, begin, 2); + Asm->OutStreamer.EmitLabel(begin); + // Emit the entry. + APByteStreamer Streamer(*Asm); + emitDebugLocEntry(Streamer, Entry); + // Close the range. + Asm->OutStreamer.EmitLabel(end); +} - bool operator() (const SymbolCU &A, const SymbolCU &B) { - unsigned IA = A.Sym ? Streamer.GetSymbolOrder(A.Sym) : 0; - unsigned IB = B.Sym ? Streamer.GetSymbolOrder(B.Sym) : 0; +// Emit locations into the debug loc section. +void DwarfDebug::emitDebugLoc() { + // Start the dwarf loc section. + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfLocSection()); + unsigned char Size = Asm->getDataLayout().getPointerSize(); + for (const auto &DebugLoc : DotDebugLocEntries) { + Asm->OutStreamer.EmitLabel(DebugLoc.Label); + for (const auto &Entry : DebugLoc.List) { + // Set up the range. This range is relative to the entry point of the + // compile unit. This is a hard coded 0 for low_pc when we're emitting + // ranges, or the DW_AT_low_pc on the compile unit otherwise. + const DwarfCompileUnit *CU = Entry.getCU(); + if (CU->getRanges().size() == 1) { + // Grab the begin symbol from the first range as our base. + const MCSymbol *Base = CU->getRanges()[0].getStart(); + Asm->EmitLabelDifference(Entry.getBeginSym(), Base, Size); + Asm->EmitLabelDifference(Entry.getEndSym(), Base, Size); + } else { + Asm->OutStreamer.EmitSymbolValue(Entry.getBeginSym(), Size); + Asm->OutStreamer.EmitSymbolValue(Entry.getEndSym(), Size); + } - // Symbols with no order assigned should be placed at the end. - // (e.g. section end labels) - if (IA == 0) - IA = (unsigned)(-1); - if (IB == 0) - IB = (unsigned)(-1); - return IA < IB; + emitDebugLocEntryLocation(Entry); + } + Asm->OutStreamer.EmitIntValue(0, Size); + Asm->OutStreamer.EmitIntValue(0, Size); } -}; +} -static bool CUSort(const CompileUnit *A, const CompileUnit *B) { - return (A->getUniqueID() < B->getUniqueID()); +void DwarfDebug::emitDebugLocDWO() { + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfLocDWOSection()); + for (const auto &DebugLoc : DotDebugLocEntries) { + Asm->OutStreamer.EmitLabel(DebugLoc.Label); + for (const auto &Entry : DebugLoc.List) { + // Just always use start_length for now - at least that's one address + // rather than two. We could get fancier and try to, say, reuse an + // address we know we've emitted elsewhere (the start of the function? + // The start of the CU or CU subrange that encloses this range?) + Asm->EmitInt8(dwarf::DW_LLE_start_length_entry); + unsigned idx = InfoHolder.getAddrPoolIndex(Entry.getBeginSym()); + Asm->EmitULEB128(idx); + Asm->EmitLabelDifference(Entry.getEndSym(), Entry.getBeginSym(), 4); + + emitDebugLocEntryLocation(Entry); + } + Asm->EmitInt8(dwarf::DW_LLE_end_of_list_entry); + } } struct ArangeSpan { @@ -2777,18 +2461,17 @@ struct ArangeSpan { // address we can tie back to a CU. void DwarfDebug::emitDebugARanges() { // Start the dwarf aranges section. - Asm->OutStreamer - .SwitchSection(Asm->getObjFileLowering().getDwarfARangesSection()); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfARangesSection()); - typedef DenseMap<CompileUnit *, std::vector<ArangeSpan> > SpansType; + typedef DenseMap<DwarfCompileUnit *, std::vector<ArangeSpan> > SpansType; SpansType Spans; // Build a list of sections used. std::vector<const MCSection *> Sections; - for (SectionMapType::iterator it = SectionMap.begin(); it != SectionMap.end(); - it++) { - const MCSection *Section = it->first; + for (const auto &it : SectionMap) { + const MCSection *Section = it.first; Sections.push_back(Section); } @@ -2797,22 +2480,30 @@ void DwarfDebug::emitDebugARanges() { std::sort(Sections.begin(), Sections.end(), SectionSort); // Build a set of address spans, sorted by CU. - for (size_t SecIdx=0;SecIdx<Sections.size();SecIdx++) { - const MCSection *Section = Sections[SecIdx]; + for (const MCSection *Section : Sections) { SmallVector<SymbolCU, 8> &List = SectionMap[Section]; if (List.size() < 2) continue; // Sort the symbols by offset within the section. - SymbolCUSorter sorter(Asm->OutStreamer); - std::sort(List.begin(), List.end(), sorter); + std::sort(List.begin(), List.end(), + [&](const SymbolCU &A, const SymbolCU &B) { + unsigned IA = A.Sym ? Asm->OutStreamer.GetSymbolOrder(A.Sym) : 0; + unsigned IB = B.Sym ? Asm->OutStreamer.GetSymbolOrder(B.Sym) : 0; + + // Symbols with no order assigned should be placed at the end. + // (e.g. section end labels) + if (IA == 0) + return false; + if (IB == 0) + return true; + return IA < IB; + }); // If we have no section (e.g. common), just write out // individual spans for each symbol. if (Section == NULL) { - for (size_t n = 0; n < List.size(); n++) { - const SymbolCU &Cur = List[n]; - + for (const SymbolCU &Cur : List) { ArangeSpan Span; Span.Start = Cur.Sym; Span.End = NULL; @@ -2822,7 +2513,7 @@ void DwarfDebug::emitDebugARanges() { } else { // Build spans between each label. const MCSymbol *StartSym = List[0].Sym; - for (size_t n = 1; n < List.size(); n++) { + for (size_t n = 1, e = List.size(); n < e; n++) { const SymbolCU &Prev = List[n - 1]; const SymbolCU &Cur = List[n]; @@ -2838,37 +2529,36 @@ void DwarfDebug::emitDebugARanges() { } } - const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection(); unsigned PtrSize = Asm->getDataLayout().getPointerSize(); // Build a list of CUs used. - std::vector<CompileUnit *> CUs; - for (SpansType::iterator it = Spans.begin(); it != Spans.end(); it++) { - CompileUnit *CU = it->first; + std::vector<DwarfCompileUnit *> CUs; + for (const auto &it : Spans) { + DwarfCompileUnit *CU = it.first; CUs.push_back(CU); } // Sort the CU list (again, to ensure consistent output order). - std::sort(CUs.begin(), CUs.end(), CUSort); + std::sort(CUs.begin(), CUs.end(), [](const DwarfUnit *A, const DwarfUnit *B) { + return A->getUniqueID() < B->getUniqueID(); + }); // Emit an arange table for each CU we used. - for (size_t CUIdx=0;CUIdx<CUs.size();CUIdx++) { - CompileUnit *CU = CUs[CUIdx]; + for (DwarfCompileUnit *CU : CUs) { std::vector<ArangeSpan> &List = Spans[CU]; // Emit size of content not including length itself. - unsigned ContentSize - = sizeof(int16_t) // DWARF ARange version number - + sizeof(int32_t) // Offset of CU in the .debug_info section - + sizeof(int8_t) // Pointer Size (in bytes) - + sizeof(int8_t); // Segment Size (in bytes) + unsigned ContentSize = + sizeof(int16_t) + // DWARF ARange version number + sizeof(int32_t) + // Offset of CU in the .debug_info section + sizeof(int8_t) + // Pointer Size (in bytes) + sizeof(int8_t); // Segment Size (in bytes) unsigned TupleSize = PtrSize * 2; // 7.20 in the Dwarf specs requires the table to be aligned to a tuple. - unsigned Padding = 0; - while (((sizeof(int32_t) + ContentSize + Padding) % TupleSize) != 0) - Padding++; + unsigned Padding = + OffsetToAlignment(sizeof(int32_t) + ContentSize, TupleSize); ContentSize += Padding; ContentSize += (List.size() + 1) * TupleSize; @@ -2879,19 +2569,15 @@ void DwarfDebug::emitDebugARanges() { Asm->OutStreamer.AddComment("DWARF Arange version number"); Asm->EmitInt16(dwarf::DW_ARANGES_VERSION); Asm->OutStreamer.AddComment("Offset Into Debug Info Section"); - Asm->EmitSectionOffset( - Asm->GetTempSymbol(ISec->getLabelBeginName(), CU->getUniqueID()), - DwarfInfoSectionSym); + Asm->EmitSectionOffset(CU->getLocalLabelBegin(), CU->getLocalSectionSym()); Asm->OutStreamer.AddComment("Address Size (in bytes)"); Asm->EmitInt8(PtrSize); Asm->OutStreamer.AddComment("Segment Size (in bytes)"); Asm->EmitInt8(0); - for (unsigned n = 0; n < Padding; n++) - Asm->EmitInt8(0xff); + Asm->OutStreamer.EmitFill(Padding, 0xff); - for (unsigned n = 0; n < List.size(); n++) { - const ArangeSpan &Span = List[n]; + for (const ArangeSpan &Span : List) { Asm->EmitLabelReference(Span.Start, PtrSize); // Calculate the size as being from the span start to it's end. @@ -2917,122 +2603,129 @@ void DwarfDebug::emitDebugARanges() { // Emit visible names into a debug ranges section. void DwarfDebug::emitDebugRanges() { // Start the dwarf ranges section. - Asm->OutStreamer - .SwitchSection(Asm->getObjFileLowering().getDwarfRangesSection()); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfRangesSection()); + + // Size for our labels. unsigned char Size = Asm->getDataLayout().getPointerSize(); - for (SmallVectorImpl<const MCSymbol *>::iterator - I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end(); - I != E; ++I) { - if (*I) - Asm->OutStreamer.EmitSymbolValue(const_cast<MCSymbol*>(*I), Size); - else + + // Grab the specific ranges for the compile units in the module. + for (const auto &I : CUMap) { + DwarfCompileUnit *TheCU = I.second; + + // Emit a symbol so we can find the beginning of our ranges. + Asm->OutStreamer.EmitLabel(TheCU->getLabelRange()); + + // Iterate over the misc ranges for the compile units in the module. + for (const RangeSpanList &List : TheCU->getRangeLists()) { + // Emit our symbol so we can find the beginning of the range. + Asm->OutStreamer.EmitLabel(List.getSym()); + + for (const RangeSpan &Range : List.getRanges()) { + const MCSymbol *Begin = Range.getStart(); + const MCSymbol *End = Range.getEnd(); + assert(Begin && "Range without a begin symbol?"); + assert(End && "Range without an end symbol?"); + Asm->OutStreamer.EmitSymbolValue(Begin, Size); + Asm->OutStreamer.EmitSymbolValue(End, Size); + } + + // And terminate the list with two 0 values. Asm->OutStreamer.EmitIntValue(0, Size); - } -} + Asm->OutStreamer.EmitIntValue(0, Size); + } -// Emit visible names into a debug macinfo section. -void DwarfDebug::emitDebugMacInfo() { - if (const MCSection *LineInfo = - Asm->getObjFileLowering().getDwarfMacroInfoSection()) { - // Start the dwarf macinfo section. - Asm->OutStreamer.SwitchSection(LineInfo); + // Now emit a range for the CU itself. + if (TheCU->getRanges().size() > 1) { + Asm->OutStreamer.EmitLabel( + Asm->GetTempSymbol("cu_ranges", TheCU->getUniqueID())); + for (const RangeSpan &Range : TheCU->getRanges()) { + const MCSymbol *Begin = Range.getStart(); + const MCSymbol *End = Range.getEnd(); + assert(Begin && "Range without a begin symbol?"); + assert(End && "Range without an end symbol?"); + Asm->OutStreamer.EmitSymbolValue(Begin, Size); + Asm->OutStreamer.EmitSymbolValue(End, Size); + } + // And terminate the list with two 0 values. + Asm->OutStreamer.EmitIntValue(0, Size); + Asm->OutStreamer.EmitIntValue(0, Size); + } } } // DWARF5 Experimental Separate Dwarf emitters. -// This DIE has the following attributes: DW_AT_comp_dir, DW_AT_stmt_list, -// DW_AT_low_pc, DW_AT_high_pc, DW_AT_ranges, DW_AT_dwo_name, DW_AT_dwo_id, -// DW_AT_ranges_base, DW_AT_addr_base. -CompileUnit *DwarfDebug::constructSkeletonCU(const CompileUnit *CU) { +void DwarfDebug::initSkeletonUnit(const DwarfUnit *U, DIE *Die, + DwarfUnit *NewU) { + NewU->addLocalString(Die, dwarf::DW_AT_GNU_dwo_name, + U->getCUNode().getSplitDebugFilename()); - DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); - CompileUnit *NewCU = new CompileUnit(CU->getUniqueID(), Die, CU->getNode(), - Asm, this, &SkeletonHolder); - - NewCU->addLocalString(Die, dwarf::DW_AT_GNU_dwo_name, - CU->getNode().getSplitDebugFilename()); - - // Relocate to the beginning of the addr_base section, else 0 for the - // beginning of the one for this compile unit. - if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - NewCU->addLabel(Die, dwarf::DW_AT_GNU_addr_base, dwarf::DW_FORM_sec_offset, - DwarfAddrSectionSym); - else - NewCU->addUInt(Die, dwarf::DW_AT_GNU_addr_base, - dwarf::DW_FORM_sec_offset, 0); + if (!CompilationDir.empty()) + NewU->addLocalString(Die, dwarf::DW_AT_comp_dir, CompilationDir); - // 2.17.1 requires that we use DW_AT_low_pc for a single entry point - // into an entity. We're using 0, or a NULL label for this. - NewCU->addUInt(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0); + addGnuPubAttributes(NewU, Die); - // DW_AT_stmt_list is a offset of line number information for this - // compile unit in debug_line section. - // FIXME: Should handle multiple compile units. - if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_sec_offset, - DwarfLineSectionSym); - else - NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_sec_offset, 0); - - if (!CompilationDir.empty()) - NewCU->addLocalString(Die, dwarf::DW_AT_comp_dir, CompilationDir); + SkeletonHolder.addUnit(NewU); +} - // Flags to let the linker know we have emitted new style pubnames. - if (GenerateGnuPubSections) { - if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - NewCU->addLabel(Die, dwarf::DW_AT_GNU_pubnames, dwarf::DW_FORM_sec_offset, - Asm->GetTempSymbol("gnu_pubnames", NewCU->getUniqueID())); - else - NewCU->addDelta(Die, dwarf::DW_AT_GNU_pubnames, dwarf::DW_FORM_data4, - Asm->GetTempSymbol("gnu_pubnames", NewCU->getUniqueID()), - DwarfGnuPubNamesSectionSym); +// This DIE has the following attributes: DW_AT_comp_dir, DW_AT_stmt_list, +// DW_AT_low_pc, DW_AT_high_pc, DW_AT_ranges, DW_AT_dwo_name, DW_AT_dwo_id, +// DW_AT_addr_base, DW_AT_ranges_base. +DwarfCompileUnit *DwarfDebug::constructSkeletonCU(const DwarfCompileUnit *CU) { - if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - NewCU->addLabel(Die, dwarf::DW_AT_GNU_pubtypes, dwarf::DW_FORM_sec_offset, - Asm->GetTempSymbol("gnu_pubtypes", NewCU->getUniqueID())); - else - NewCU->addDelta(Die, dwarf::DW_AT_GNU_pubtypes, dwarf::DW_FORM_data4, - Asm->GetTempSymbol("gnu_pubtypes", NewCU->getUniqueID()), - DwarfGnuPubTypesSectionSym); - } + DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); + DwarfCompileUnit *NewCU = new DwarfCompileUnit( + CU->getUniqueID(), Die, CU->getCUNode(), Asm, this, &SkeletonHolder); + NewCU->initSection(Asm->getObjFileLowering().getDwarfInfoSection(), + DwarfInfoSectionSym); - // Flag if we've emitted any ranges and their location for the compile unit. - if (DebugRangeSymbols.size()) { - if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - NewCU->addLabel(Die, dwarf::DW_AT_GNU_ranges_base, - dwarf::DW_FORM_sec_offset, DwarfDebugRangeSectionSym); - else - NewCU->addUInt(Die, dwarf::DW_AT_GNU_ranges_base, dwarf::DW_FORM_data4, - 0); - } + NewCU->initStmtList(DwarfLineSectionSym); - SkeletonHolder.addUnit(NewCU); - SkeletonCUs.push_back(NewCU); + initSkeletonUnit(CU, Die, NewCU); return NewCU; } -void DwarfDebug::emitSkeletonAbbrevs(const MCSection *Section) { - assert(useSplitDwarf() && "No split dwarf debug info?"); - emitAbbrevs(Section, &SkeletonAbbrevs); +// This DIE has the following attributes: DW_AT_comp_dir, DW_AT_dwo_name, +// DW_AT_addr_base. +DwarfTypeUnit *DwarfDebug::constructSkeletonTU(DwarfTypeUnit *TU) { + DwarfCompileUnit &CU = static_cast<DwarfCompileUnit &>( + *SkeletonHolder.getUnits()[TU->getCU().getUniqueID()]); + + DIE *Die = new DIE(dwarf::DW_TAG_type_unit); + DwarfTypeUnit *NewTU = + new DwarfTypeUnit(TU->getUniqueID(), Die, CU, Asm, this, &SkeletonHolder); + NewTU->setTypeSignature(TU->getTypeSignature()); + NewTU->setType(NULL); + NewTU->initSection( + Asm->getObjFileLowering().getDwarfTypesSection(TU->getTypeSignature())); + + initSkeletonUnit(TU, Die, NewTU); + return NewTU; } // Emit the .debug_info.dwo section for separated dwarf. This contains the // compile units that would normally be in debug_info. void DwarfDebug::emitDebugInfoDWO() { assert(useSplitDwarf() && "No split dwarf debug info?"); - InfoHolder.emitUnits(this, Asm->getObjFileLowering().getDwarfInfoDWOSection(), - Asm->getObjFileLowering().getDwarfAbbrevDWOSection(), - DwarfAbbrevDWOSectionSym); + // Don't pass an abbrev symbol, using a constant zero instead so as not to + // emit relocations into the dwo file. + InfoHolder.emitUnits(this, /* AbbrevSymbol */nullptr); } // Emit the .debug_abbrev.dwo section for separated dwarf. This contains the // abbreviations for the .debug_info.dwo section. void DwarfDebug::emitDebugAbbrevDWO() { assert(useSplitDwarf() && "No split dwarf?"); - emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevDWOSection(), - &Abbreviations); + InfoHolder.emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevDWOSection()); +} + +void DwarfDebug::emitDebugLineDWO() { + assert(useSplitDwarf() && "No split dwarf?"); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfLineDWOSection()); + SplitTypeUnitFileTable.Emit(Asm->OutStreamer); } // Emit the .debug_str.dwo section for separated dwarf. This contains the @@ -3040,9 +2733,75 @@ void DwarfDebug::emitDebugAbbrevDWO() { // sections. void DwarfDebug::emitDebugStrDWO() { assert(useSplitDwarf() && "No split dwarf?"); - const MCSection *OffSec = Asm->getObjFileLowering() - .getDwarfStrOffDWOSection(); + const MCSection *OffSec = + Asm->getObjFileLowering().getDwarfStrOffDWOSection(); const MCSymbol *StrSym = DwarfStrSectionSym; InfoHolder.emitStrings(Asm->getObjFileLowering().getDwarfStrDWOSection(), OffSec, StrSym); } + +MCDwarfDwoLineTable *DwarfDebug::getDwoLineTable(const DwarfCompileUnit &CU) { + if (!useSplitDwarf()) + return nullptr; + if (SingleCU) + SplitTypeUnitFileTable.setCompilationDir(CU.getCUNode().getDirectory()); + return &SplitTypeUnitFileTable; +} + +void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, + StringRef Identifier, DIE *RefDie, + DICompositeType CTy) { + // Flag the type unit reference as a declaration so that if it contains + // members (implicit special members, static data member definitions, member + // declarations for definitions in this CU, etc) consumers don't get confused + // and think this is a full definition. + CU.addFlag(RefDie, dwarf::DW_AT_declaration); + + const DwarfTypeUnit *&TU = DwarfTypeUnits[CTy]; + if (TU) { + CU.addDIETypeSignature(RefDie, *TU); + return; + } + + DIE *UnitDie = new DIE(dwarf::DW_TAG_type_unit); + DwarfTypeUnit *NewTU = + new DwarfTypeUnit(InfoHolder.getUnits().size(), UnitDie, CU, Asm, this, + &InfoHolder, getDwoLineTable(CU)); + TU = NewTU; + InfoHolder.addUnit(NewTU); + + NewTU->addUInt(UnitDie, dwarf::DW_AT_language, dwarf::DW_FORM_data2, + CU.getLanguage()); + + MD5 Hash; + Hash.update(Identifier); + // ... take the least significant 8 bytes and return those. Our MD5 + // implementation always returns its results in little endian, swap bytes + // appropriately. + MD5::MD5Result Result; + Hash.final(Result); + uint64_t Signature = *reinterpret_cast<support::ulittle64_t *>(Result + 8); + NewTU->setTypeSignature(Signature); + if (useSplitDwarf()) + NewTU->setSkeleton(constructSkeletonTU(NewTU)); + else + CU.applyStmtList(*UnitDie); + + NewTU->setType(NewTU->createTypeDIE(CTy)); + + NewTU->initSection( + useSplitDwarf() + ? Asm->getObjFileLowering().getDwarfTypesDWOSection(Signature) + : Asm->getObjFileLowering().getDwarfTypesSection(Signature)); + + CU.addDIETypeSignature(RefDie, *NewTU); +} + +void DwarfDebug::attachLowHighPC(DwarfCompileUnit *Unit, DIE *D, + MCSymbol *Begin, MCSymbol *End) { + Unit->addLabelAddress(D, dwarf::DW_AT_low_pc, Begin); + if (DwarfVersion < 4) + Unit->addLabelAddress(D, dwarf::DW_AT_high_pc, End); + else + Unit->addLabelDelta(D, dwarf::DW_AT_high_pc, End, Begin); +} diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index cebac39..da708f5 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -14,44 +14,44 @@ #ifndef CODEGEN_ASMPRINTER_DWARFDEBUG_H__ #define CODEGEN_ASMPRINTER_DWARFDEBUG_H__ +#include "AsmPrinterHandler.h" #include "DIE.h" +#include "DebugLocEntry.h" +#include "DebugLocList.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringMap.h" -#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/ADT/FoldingSet.h" #include "llvm/CodeGen/LexicalScopes.h" -#include "llvm/DebugInfo.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/MC/MachineLocation.h" +#include "llvm/MC/MCDwarf.h" #include "llvm/Support/Allocator.h" -#include "llvm/Support/DebugLoc.h" namespace llvm { -class CompileUnit; +class AsmPrinter; +class ByteStreamer; class ConstantInt; class ConstantFP; -class DbgVariable; -class MachineFrameInfo; +class DwarfCompileUnit; +class DwarfDebug; +class DwarfTypeUnit; +class DwarfUnit; class MachineModuleInfo; -class MachineOperand; -class MCAsmInfo; -class DIEAbbrev; -class DIE; -class DIEBlock; -class DIEEntry; //===----------------------------------------------------------------------===// /// \brief This class is used to record source line correspondence. class SrcLineInfo { - unsigned Line; // Source line number. - unsigned Column; // Source column. - unsigned SourceID; // Source ID number. - MCSymbol *Label; // Label in code ID number. + unsigned Line; // Source line number. + unsigned Column; // Source column. + unsigned SourceID; // Source ID number. + MCSymbol *Label; // Label in code ID number. public: SrcLineInfo(unsigned L, unsigned C, unsigned S, MCSymbol *label) - : Line(L), Column(C), SourceID(S), Label(label) {} + : Line(L), Column(C), SourceID(S), Label(label) {} // Accessors unsigned getLine() const { return Line; } @@ -60,124 +60,44 @@ public: MCSymbol *getLabel() const { return Label; } }; -/// \brief This struct describes location entries emitted in the .debug_loc -/// section. -class DotDebugLocEntry { - // Begin and end symbols for the address range that this location is valid. - const MCSymbol *Begin; - const MCSymbol *End; - - // Type of entry that this represents. - enum EntryType { - E_Location, - E_Integer, - E_ConstantFP, - E_ConstantInt - }; - enum EntryType EntryKind; - - union { - int64_t Int; - const ConstantFP *CFP; - const ConstantInt *CIP; - } Constants; - - // The location in the machine frame. - MachineLocation Loc; - - // The variable to which this location entry corresponds. - const MDNode *Variable; - - // Whether this location has been merged. - bool Merged; - -public: - DotDebugLocEntry() : Begin(0), End(0), Variable(0), Merged(false) { - Constants.Int = 0; - } - DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, MachineLocation &L, - const MDNode *V) - : Begin(B), End(E), Loc(L), Variable(V), Merged(false) { - Constants.Int = 0; - EntryKind = E_Location; - } - DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, int64_t i) - : Begin(B), End(E), Variable(0), Merged(false) { - Constants.Int = i; - EntryKind = E_Integer; - } - DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantFP *FPtr) - : Begin(B), End(E), Variable(0), Merged(false) { - Constants.CFP = FPtr; - EntryKind = E_ConstantFP; - } - DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, - const ConstantInt *IPtr) - : Begin(B), End(E), Variable(0), Merged(false) { - Constants.CIP = IPtr; - EntryKind = E_ConstantInt; - } - - /// \brief Empty entries are also used as a trigger to emit temp label. Such - /// labels are referenced is used to find debug_loc offset for a given DIE. - bool isEmpty() { return Begin == 0 && End == 0; } - bool isMerged() { return Merged; } - void Merge(DotDebugLocEntry *Next) { - if (!(Begin && Loc == Next->Loc && End == Next->Begin)) - return; - Next->Begin = Begin; - Merged = true; - } - bool isLocation() const { return EntryKind == E_Location; } - bool isInt() const { return EntryKind == E_Integer; } - bool isConstantFP() const { return EntryKind == E_ConstantFP; } - bool isConstantInt() const { return EntryKind == E_ConstantInt; } - int64_t getInt() const { return Constants.Int; } - const ConstantFP *getConstantFP() const { return Constants.CFP; } - const ConstantInt *getConstantInt() const { return Constants.CIP; } - const MDNode *getVariable() const { return Variable; } - const MCSymbol *getBeginSym() const { return Begin; } - const MCSymbol *getEndSym() const { return End; } - MachineLocation getLoc() const { return Loc; } -}; - //===----------------------------------------------------------------------===// /// \brief This class is used to track local variable information. class DbgVariable { - DIVariable Var; // Variable Descriptor. - DIE *TheDIE; // Variable DIE. - unsigned DotDebugLocOffset; // Offset in DotDebugLocEntries. - DbgVariable *AbsVar; // Corresponding Abstract variable, if any. - const MachineInstr *MInsn; // DBG_VALUE instruction of the variable. + DIVariable Var; // Variable Descriptor. + DIE *TheDIE; // Variable DIE. + unsigned DotDebugLocOffset; // Offset in DotDebugLocEntries. + DbgVariable *AbsVar; // Corresponding Abstract variable, if any. + const MachineInstr *MInsn; // DBG_VALUE instruction of the variable. int FrameIndex; DwarfDebug *DD; + public: // AbsVar may be NULL. DbgVariable(DIVariable V, DbgVariable *AV, DwarfDebug *DD) - : Var(V), TheDIE(0), DotDebugLocOffset(~0U), AbsVar(AV), MInsn(0), - FrameIndex(~0), DD(DD) {} + : Var(V), TheDIE(0), DotDebugLocOffset(~0U), AbsVar(AV), MInsn(0), + FrameIndex(~0), DD(DD) {} // Accessors. - DIVariable getVariable() const { return Var; } - void setDIE(DIE *D) { TheDIE = D; } - DIE *getDIE() const { return TheDIE; } - void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; } - unsigned getDotDebugLocOffset() const { return DotDebugLocOffset; } - StringRef getName() const { return Var.getName(); } + DIVariable getVariable() const { return Var; } + void setDIE(DIE *D) { TheDIE = D; } + DIE *getDIE() const { return TheDIE; } + void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; } + unsigned getDotDebugLocOffset() const { return DotDebugLocOffset; } + StringRef getName() const { return Var.getName(); } DbgVariable *getAbstractVariable() const { return AbsVar; } - const MachineInstr *getMInsn() const { return MInsn; } - void setMInsn(const MachineInstr *M) { MInsn = M; } - int getFrameIndex() const { return FrameIndex; } - void setFrameIndex(int FI) { FrameIndex = FI; } + const MachineInstr *getMInsn() const { return MInsn; } + void setMInsn(const MachineInstr *M) { MInsn = M; } + int getFrameIndex() const { return FrameIndex; } + void setFrameIndex(int FI) { FrameIndex = FI; } // Translate tag to proper Dwarf tag. - uint16_t getTag() const { + uint16_t getTag() const { if (Var.getTag() == dwarf::DW_TAG_arg_variable) return dwarf::DW_TAG_formal_parameter; return dwarf::DW_TAG_variable; } /// \brief Return true if DbgVariable is artificial. - bool isArtificial() const { + bool isArtificial() const { if (Var.isArtificial()) return true; if (getType().isArtificial()) @@ -185,7 +105,7 @@ public: return false; } - bool isObjectPointer() const { + bool isObjectPointer() const { if (Var.isObjectPointer()) return true; if (getType().isObjectPointer()) @@ -193,21 +113,16 @@ public: return false; } - bool variableHasComplexAddress() const { + bool variableHasComplexAddress() const { assert(Var.isVariable() && "Invalid complex DbgVariable!"); return Var.hasComplexAddress(); } - bool isBlockByrefVariable() const { - assert(Var.isVariable() && "Invalid complex DbgVariable!"); - return Var.isBlockByrefVariable(); - } - unsigned getNumAddrElements() const { + bool isBlockByrefVariable() const; + unsigned getNumAddrElements() const { assert(Var.isVariable() && "Invalid complex DbgVariable!"); return Var.getNumAddrElements(); } - uint64_t getAddrElement(unsigned i) const { - return Var.getAddrElement(i); - } + uint64_t getAddrElement(unsigned i) const { return Var.getAddrElement(i); } DIType getType() const; private: @@ -217,43 +132,50 @@ private: }; /// \brief Collects and handles information specific to a particular -/// collection of units. -class DwarfUnits { +/// collection of units. This collection represents all of the units +/// that will be ultimately output into a single object file. +class DwarfFile { // Target of Dwarf emission, used for sizing of abbreviations. AsmPrinter *Asm; // Used to uniquely define abbreviations. - FoldingSet<DIEAbbrev> *AbbreviationsSet; + FoldingSet<DIEAbbrev> AbbreviationsSet; // A list of all the unique abbreviations in use. - std::vector<DIEAbbrev *> &Abbreviations; + std::vector<DIEAbbrev *> Abbreviations; // A pointer to all units in the section. - SmallVector<CompileUnit *, 1> CUs; + SmallVector<DwarfUnit *, 1> CUs; // Collection of strings for this unit and assorted symbols. // A String->Symbol mapping of strings used by indirect // references. - typedef StringMap<std::pair<MCSymbol*, unsigned>, - BumpPtrAllocator&> StrPool; + typedef StringMap<std::pair<MCSymbol *, unsigned>, BumpPtrAllocator &> + StrPool; StrPool StringPool; unsigned NextStringPoolNumber; std::string StringPref; + struct AddressPoolEntry { + unsigned Number; + bool TLS; + AddressPoolEntry(unsigned Number, bool TLS) : Number(Number), TLS(TLS) {} + }; // Collection of addresses for this unit and assorted labels. // A Symbol->unsigned mapping of addresses used by indirect // references. - typedef DenseMap<const MCExpr *, unsigned> AddrPool; + typedef DenseMap<const MCSymbol *, AddressPoolEntry> AddrPool; AddrPool AddressPool; unsigned NextAddrPoolNumber; public: - DwarfUnits(AsmPrinter *AP, FoldingSet<DIEAbbrev> *AS, - std::vector<DIEAbbrev *> &A, const char *Pref, - BumpPtrAllocator &DA) - : Asm(AP), AbbreviationsSet(AS), Abbreviations(A), StringPool(DA), - NextStringPoolNumber(0), StringPref(Pref), AddressPool(), - NextAddrPoolNumber(0) {} + DwarfFile(AsmPrinter *AP, const char *Pref, BumpPtrAllocator &DA) + : Asm(AP), StringPool(DA), NextStringPoolNumber(0), StringPref(Pref), + AddressPool(), NextAddrPoolNumber(0) {} + + ~DwarfFile(); + + const SmallVectorImpl<DwarfUnit *> &getUnits() { return CUs; } /// \brief Compute the size and offset of a DIE given an incoming Offset. unsigned computeSizeAndOffset(DIE *Die, unsigned Offset); @@ -265,12 +187,14 @@ public: void assignAbbrevNumber(DIEAbbrev &Abbrev); /// \brief Add a unit to the list of CUs. - void addUnit(CompileUnit *CU) { CUs.push_back(CU); } + void addUnit(DwarfUnit *CU) { CUs.push_back(CU); } /// \brief Emit all of the units to the section listed with the given /// abbreviation section. - void emitUnits(DwarfDebug *DD, const MCSection *USection, - const MCSection *ASection, const MCSymbol *ASectionSym); + void emitUnits(DwarfDebug *DD, const MCSymbol *ASectionSym); + + /// \brief Emit a set of abbreviations to the specific section. + void emitAbbrevs(const MCSection *); /// \brief Emit all of the strings to the section given. void emitStrings(const MCSection *StrSection, const MCSection *OffsetSection, @@ -295,8 +219,7 @@ public: /// \brief Returns the index into the address pool with the given /// label/symbol. - unsigned getAddrPoolIndex(const MCExpr *Sym); - unsigned getAddrPoolIndex(const MCSymbol *Sym); + unsigned getAddrPoolIndex(const MCSymbol *Sym, bool TLS = false); /// \brief Returns the address pool. AddrPool *getAddrPool() { return &AddressPool; } @@ -304,13 +227,13 @@ public: /// \brief Helper used to pair up a symbol and its DWARF compile unit. struct SymbolCU { - SymbolCU(CompileUnit *CU, const MCSymbol *Sym) : Sym(Sym), CU(CU) {} + SymbolCU(DwarfCompileUnit *CU, const MCSymbol *Sym) : Sym(Sym), CU(CU) {} const MCSymbol *Sym; - CompileUnit *CU; + DwarfCompileUnit *CU; }; /// \brief Collects and handles dwarf debug information. -class DwarfDebug { +class DwarfDebug : public AsmPrinterHandler { // Target of Dwarf emission. AsmPrinter *Asm; @@ -320,40 +243,31 @@ class DwarfDebug { // All DIEValues are allocated through this allocator. BumpPtrAllocator DIEValueAllocator; - // Handle to the a compile unit used for the inline extension handling. - CompileUnit *FirstCU; + // Handle to the compile unit used for the inline extension handling, + // this is just so that the DIEValue allocator has a place to store + // the particular elements. + // FIXME: Store these off of DwarfDebug instead? + DwarfCompileUnit *FirstCU; - // Maps MDNode with its corresponding CompileUnit. - DenseMap <const MDNode *, CompileUnit *> CUMap; + // Maps MDNode with its corresponding DwarfCompileUnit. + MapVector<const MDNode *, DwarfCompileUnit *> CUMap; - // Maps subprogram MDNode with its corresponding CompileUnit. - DenseMap <const MDNode *, CompileUnit *> SPMap; + // Maps subprogram MDNode with its corresponding DwarfCompileUnit. + DenseMap<const MDNode *, DwarfCompileUnit *> SPMap; - // Maps a CU DIE with its corresponding CompileUnit. - DenseMap <const DIE *, CompileUnit *> CUDieMap; + // Maps a CU DIE with its corresponding DwarfCompileUnit. + DenseMap<const DIE *, DwarfCompileUnit *> CUDieMap; - /// Maps MDNodes for type sysstem with the corresponding DIEs. These DIEs can + /// Maps MDNodes for type system with the corresponding DIEs. These DIEs can /// be shared across CUs, that is why we keep the map here instead - /// of in CompileUnit. + /// of in DwarfCompileUnit. DenseMap<const MDNode *, DIE *> MDTypeNodeToDieMap; - // Used to uniquely define abbreviations. - FoldingSet<DIEAbbrev> AbbreviationsSet; - - // A list of all the unique abbreviations in use. - std::vector<DIEAbbrev *> Abbreviations; - - // Stores the current file ID for a given compile unit. - DenseMap <unsigned, unsigned> FileIDCUMap; - // Source id map, i.e. CUID, source filename and directory, - // separated by a zero byte, mapped to a unique id. - StringMap<unsigned, BumpPtrAllocator&> SourceIdMap; - // List of all labels used in aranges generation. std::vector<SymbolCU> ArangeLabels; // Size of each symbol emitted (for those symbols that have a specific size). - DenseMap <const MCSymbol *, uint64_t> SymSize; + DenseMap<const MCSymbol *, uint64_t> SymSize; // Provides a unique id per text section. typedef DenseMap<const MCSection *, SmallVector<SymbolCU, 8> > SectionMapType; @@ -368,15 +282,16 @@ class DwarfDebug { DenseMap<const MDNode *, DIE *> AbstractSPDies; // Collection of dbg variables of a scope. - typedef DenseMap<LexicalScope *, - SmallVector<DbgVariable *, 8> > ScopeVariablesMap; + typedef DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8> > + ScopeVariablesMap; ScopeVariablesMap ScopeVariables; // Collection of abstract variables. DenseMap<const MDNode *, DbgVariable *> AbstractVariables; - // Collection of DotDebugLocEntry. - SmallVector<DotDebugLocEntry, 4> DotDebugLocEntries; + // Collection of DebugLocEntry. Stored in a linked list so that DIELocLists + // can refer to them in spite of insertions into this list. + SmallVector<DebugLocList, 4> DotDebugLocEntries; // Collection of subprogram DIEs that are marked (at the end of the module) // as DW_AT_inline. @@ -394,17 +309,15 @@ class DwarfDebug { // Every user variable mentioned by a DBG_VALUE instruction in order of // appearance. - SmallVector<const MDNode*, 8> UserVariables; + SmallVector<const MDNode *, 8> UserVariables; // For each user variable, keep a list of DBG_VALUE instructions in order. // The list can also contain normal instructions that clobber the previous // DBG_VALUE. - typedef DenseMap<const MDNode*, SmallVector<const MachineInstr*, 4> > - DbgValueHistoryMap; + typedef DenseMap<const MDNode *, SmallVector<const MachineInstr *, 4> > + DbgValueHistoryMap; DbgValueHistoryMap DbgValues; - SmallVector<const MCSymbol *, 8> DebugRangeSymbols; - // Previous instruction's location information. This is used to determine // label location to indicate scope boundries in dwarf debug info. DebugLoc PrevInstLoc; @@ -414,6 +327,19 @@ class DwarfDebug { // body. DebugLoc PrologEndLoc; + // If nonnull, stores the current machine function we're processing. + const MachineFunction *CurFn; + + // If nonnull, stores the current machine instruction we're processing. + const MachineInstr *CurMI; + + // If nonnull, stores the section that the previous function was allocated to + // emitting. + const MCSection *PrevSection; + + // If nonnull, stores the CU in which the previous subprogram was contained. + const DwarfCompileUnit *PrevCU; + // Section Symbols: these are assembler temporary labels that are emitted at // the beginning of each supported dwarf section. These are used to form // section offsets and are created by EmitSectionLabels. @@ -421,36 +347,48 @@ class DwarfDebug { MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym; MCSymbol *DwarfDebugLocSectionSym, *DwarfLineSectionSym, *DwarfAddrSectionSym; MCSymbol *FunctionBeginSym, *FunctionEndSym; - MCSymbol *DwarfAbbrevDWOSectionSym, *DwarfStrDWOSectionSym; + MCSymbol *DwarfInfoDWOSectionSym, *DwarfAbbrevDWOSectionSym; + MCSymbol *DwarfStrDWOSectionSym; MCSymbol *DwarfGnuPubNamesSectionSym, *DwarfGnuPubTypesSectionSym; // As an optimization, there is no need to emit an entry in the directory // table for the same directory as DW_AT_comp_dir. StringRef CompilationDir; - // Counter for assigning globally unique IDs for CUs. - unsigned GlobalCUIndexCount; + // Counter for assigning globally unique IDs for ranges. + unsigned GlobalRangeCount; // Holder for the file specific debug information. - DwarfUnits InfoHolder; + DwarfFile InfoHolder; // Holders for the various debug information flags that we might need to // have exposed. See accessor functions below for description. // Holder for imported entities. typedef SmallVector<std::pair<const MDNode *, const MDNode *>, 32> - ImportedEntityMap; + ImportedEntityMap; ImportedEntityMap ScopesWithImportedEntities; - // Holder for types that are going to be extracted out into a type unit. - std::vector<DIE *> TypeUnits; + // Map from MDNodes for user-defined types to the type units that describe + // them. + DenseMap<const MDNode *, const DwarfTypeUnit *> DwarfTypeUnits; // Whether to emit the pubnames/pubtypes sections. bool HasDwarfPubSections; + // Whether or not to use AT_ranges for compilation units. + bool HasCURanges; + + // Whether we emitted a function into a section other than the default + // text. + bool UsedNonDefaultText; + // Version of dwarf we're emitting. unsigned DwarfVersion; + // Maps from a type identifier to the actual MDNode. + DITypeIdentifierMap TypeIdentifierMap; + // DWARF5 Experimental Options bool HasDwarfAccelTables; bool HasSplitDwarf; @@ -460,25 +398,27 @@ class DwarfDebug { // original object file, rather than things that are meant // to be in the .dwo sections. - // The CUs left in the original object file for separated debug info. - SmallVector<CompileUnit *, 1> SkeletonCUs; - - // Used to uniquely define abbreviations for the skeleton emission. - FoldingSet<DIEAbbrev> SkeletonAbbrevSet; - - // A list of all the unique abbreviations in use. - std::vector<DIEAbbrev *> SkeletonAbbrevs; - // Holder for the skeleton information. - DwarfUnits SkeletonHolder; + DwarfFile SkeletonHolder; - // Maps from a type identifier to the actual MDNode. - DITypeIdentifierMap TypeIdentifierMap; + /// Store file names for type units under fission in a line table header that + /// will be emitted into debug_line.dwo. + // FIXME: replace this with a map from comp_dir to table so that we can emit + // multiple tables during LTO each of which uses directory 0, referencing the + // comp_dir of all the type units that use it. + MCDwarfDwoLineTable SplitTypeUnitFileTable; -private: + // True iff there are multiple CUs in this module. + bool SingleCU; + + MCDwarfDwoLineTable *getDwoLineTable(const DwarfCompileUnit &); void addScopeVariable(LexicalScope *LS, DbgVariable *Var); + const SmallVectorImpl<DwarfUnit *> &getUnits() { + return InfoHolder.getUnits(); + } + /// \brief Find abstract variable associated with Var. DbgVariable *findAbstractVariable(DIVariable &Var, DebugLoc Loc); @@ -486,24 +426,30 @@ private: /// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global /// variables in this scope then create and insert DIEs for these /// variables. - DIE *updateSubprogramScopeDIE(CompileUnit *SPCU, DISubprogram SP); + DIE *updateSubprogramScopeDIE(DwarfCompileUnit *SPCU, DISubprogram SP); + + /// \brief A helper function to check whether the DIE for a given Scope is + /// going to be null. + bool isLexicalScopeDIENull(LexicalScope *Scope); + + /// \brief A helper function to construct a RangeSpanList for a given + /// lexical scope. + void addScopeRangeList(DwarfCompileUnit *TheCU, DIE *ScopeDIE, + const SmallVectorImpl<InsnRange> &Range); /// \brief Construct new DW_TAG_lexical_block for this scope and /// attach DW_AT_low_pc/DW_AT_high_pc labels. - DIE *constructLexicalScopeDIE(CompileUnit *TheCU, LexicalScope *Scope); - /// A helper function to check whether the DIE for a given Scope is going - /// to be null. - bool isLexicalScopeDIENull(LexicalScope *Scope); + DIE *constructLexicalScopeDIE(DwarfCompileUnit *TheCU, LexicalScope *Scope); /// \brief This scope represents inlined body of a function. Construct /// DIE to represent this concrete inlined copy of the function. - DIE *constructInlinedScopeDIE(CompileUnit *TheCU, LexicalScope *Scope); + DIE *constructInlinedScopeDIE(DwarfCompileUnit *TheCU, LexicalScope *Scope); /// \brief Construct a DIE for this scope. - DIE *constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope); + DIE *constructScopeDIE(DwarfCompileUnit *TheCU, LexicalScope *Scope); /// A helper function to create children of a Scope DIE. - DIE *createScopeChildrenDIE(CompileUnit *TheCU, LexicalScope *Scope, - SmallVectorImpl<DIE*> &Children); + DIE *createScopeChildrenDIE(DwarfCompileUnit *TheCU, LexicalScope *Scope, + SmallVectorImpl<DIE *> &Children); /// \brief Emit initial Dwarf sections with a label at the start of each one. void emitSectionLabels(); @@ -528,9 +474,6 @@ private: /// open. void endSections(); - /// \brief Emit a set of abbreviations to the specific section. - void emitAbbrevs(const MCSection *, std::vector<DIEAbbrev*> *); - /// \brief Emit the debug info section. void emitDebugInfo(); @@ -566,32 +509,41 @@ private: /// index. void emitDebugPubTypes(bool GnuStyle = false); + void + emitDebugPubSection(bool GnuStyle, const MCSection *PSec, StringRef Name, + const StringMap<const DIE *> &(DwarfUnit::*Accessor)() + const); + /// \brief Emit visible names into a debug str section. void emitDebugStr(); /// \brief Emit visible names into a debug loc section. void emitDebugLoc(); + /// \brief Emit visible names into a debug loc dwo section. + void emitDebugLocDWO(); + /// \brief Emit visible names into a debug aranges section. void emitDebugARanges(); /// \brief Emit visible names into a debug ranges section. void emitDebugRanges(); - /// \brief Emit visible names into a debug macinfo section. - void emitDebugMacInfo(); - /// \brief Emit inline info using custom format. void emitDebugInlineInfo(); /// DWARF 5 Experimental Split Dwarf Emitters + /// \brief Initialize common features of skeleton units. + void initSkeletonUnit(const DwarfUnit *U, DIE *Die, DwarfUnit *NewU); + /// \brief Construct the split debug info compile unit for the debug info /// section. - CompileUnit *constructSkeletonCU(const CompileUnit *CU); + DwarfCompileUnit *constructSkeletonCU(const DwarfCompileUnit *CU); - /// \brief Emit the local split abbreviations. - void emitSkeletonAbbrevs(const MCSection *); + /// \brief Construct the split debug info compile unit for the debug info + /// section. + DwarfTypeUnit *constructSkeletonTU(DwarfTypeUnit *TU); /// \brief Emit the debug info dwo section. void emitDebugInfoDWO(); @@ -599,27 +551,33 @@ private: /// \brief Emit the debug abbrev dwo section. void emitDebugAbbrevDWO(); + /// \brief Emit the debug line dwo section. + void emitDebugLineDWO(); + /// \brief Emit the debug str dwo section. void emitDebugStrDWO(); - /// \brief Create new CompileUnit for the given metadata node with tag + /// Flags to let the linker know we have emitted new style pubnames. Only + /// emit it here if we don't have a skeleton CU for split dwarf. + void addGnuPubAttributes(DwarfUnit *U, DIE *D) const; + + /// \brief Create new DwarfCompileUnit for the given metadata node with tag /// DW_TAG_compile_unit. - CompileUnit *constructCompileUnit(DICompileUnit DIUnit); + DwarfCompileUnit *constructDwarfCompileUnit(DICompileUnit DIUnit); /// \brief Construct subprogram DIE. - void constructSubprogramDIE(CompileUnit *TheCU, const MDNode *N); + void constructSubprogramDIE(DwarfCompileUnit *TheCU, const MDNode *N); /// \brief Construct imported_module or imported_declaration DIE. - void constructImportedEntityDIE(CompileUnit *TheCU, const MDNode *N); + void constructImportedEntityDIE(DwarfCompileUnit *TheCU, const MDNode *N); /// \brief Construct import_module DIE. - void constructImportedEntityDIE(CompileUnit *TheCU, const MDNode *N, + void constructImportedEntityDIE(DwarfCompileUnit *TheCU, const MDNode *N, DIE *Context); /// \brief Construct import_module DIE. - void constructImportedEntityDIE(CompileUnit *TheCU, - const DIImportedEntity &Module, - DIE *Context); + void constructImportedEntityDIE(DwarfCompileUnit *TheCU, + const DIImportedEntity &Module, DIE *Context); /// \brief Register a source line with debug info. Returns the unique /// label that was emitted and which provides correspondence to the @@ -633,21 +591,18 @@ private: /// \brief If Var is an current function argument that add it in /// CurrentFnArguments list. - bool addCurrentFnArgument(const MachineFunction *MF, - DbgVariable *Var, LexicalScope *Scope); + bool addCurrentFnArgument(DbgVariable *Var, LexicalScope *Scope); /// \brief Populate LexicalScope entries with variables' info. - void collectVariableInfo(const MachineFunction *, - SmallPtrSet<const MDNode *, 16> &ProcessedVars); + void collectVariableInfo(SmallPtrSet<const MDNode *, 16> &ProcessedVars); /// \brief Collect variable information from the side table maintained /// by MMI. - void collectVariableInfoFromMMITable(const MachineFunction * MF, - SmallPtrSet<const MDNode *, 16> &P); + void collectVariableInfoFromMMITable(SmallPtrSet<const MDNode *, 16> &P); /// \brief Ensure that a label will be emitted before MI. void requestLabelBeforeInsn(const MachineInstr *MI) { - LabelsBeforeInsn.insert(std::make_pair(MI, (MCSymbol*)0)); + LabelsBeforeInsn.insert(std::make_pair(MI, (MCSymbol *)0)); } /// \brief Return Label preceding the instruction. @@ -655,12 +610,15 @@ private: /// \brief Ensure that a label will be emitted after MI. void requestLabelAfterInsn(const MachineInstr *MI) { - LabelsAfterInsn.insert(std::make_pair(MI, (MCSymbol*)0)); + LabelsAfterInsn.insert(std::make_pair(MI, (MCSymbol *)0)); } /// \brief Return Label immediately following the instruction. MCSymbol *getLabelAfterInsn(const MachineInstr *MI); + void attachLowHighPC(DwarfCompileUnit *Unit, DIE *D, MCSymbol *Begin, + MCSymbol *End); + public: //===--------------------------------------------------------------------===// // Main entry points. @@ -679,62 +637,89 @@ public: void beginModule(); /// \brief Emit all Dwarf sections that should come after the content. - void endModule(); + void endModule() override; /// \brief Gather pre-function debug information. - void beginFunction(const MachineFunction *MF); + void beginFunction(const MachineFunction *MF) override; /// \brief Gather and emit post-function debug information. - void endFunction(const MachineFunction *MF); + void endFunction(const MachineFunction *MF) override; /// \brief Process beginning of an instruction. - void beginInstruction(const MachineInstr *MI); + void beginInstruction(const MachineInstr *MI) override; /// \brief Process end of an instruction. - void endInstruction(const MachineInstr *MI); + void endInstruction() override; /// \brief Add a DIE to the set of types that we're going to pull into /// type units. - void addTypeUnitType(DIE *Die) { TypeUnits.push_back(Die); } + void addDwarfTypeUnitType(DwarfCompileUnit &CU, StringRef Identifier, + DIE *Die, DICompositeType CTy); /// \brief Add a label so that arange data can be generated for it. void addArangeLabel(SymbolCU SCU) { ArangeLabels.push_back(SCU); } /// \brief For symbols that have a size designated (e.g. common symbols), /// this tracks that size. - void setSymbolSize(const MCSymbol *Sym, uint64_t Size) { SymSize[Sym] = Size;} - - /// \brief Look up the source id with the given directory and source file - /// names. If none currently exists, create a new id and insert it in the - /// SourceIds map. - unsigned getOrCreateSourceID(StringRef DirName, StringRef FullName, - unsigned CUID); + void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override { + SymSize[Sym] = Size; + } /// \brief Recursively Emits a debug information entry. - void emitDIE(DIE *Die, ArrayRef<DIEAbbrev *> Abbrevs); + void emitDIE(DIE *Die); // Experimental DWARF5 features. /// \brief Returns whether or not to emit tables that dwarf consumers can /// use to accelerate lookup. - bool useDwarfAccelTables() { return HasDwarfAccelTables; } + bool useDwarfAccelTables() const { return HasDwarfAccelTables; } /// \brief Returns whether or not to change the current debug info for the /// split dwarf proposal support. - bool useSplitDwarf() { return HasSplitDwarf; } + bool useSplitDwarf() const { return HasSplitDwarf; } /// Returns the Dwarf Version. unsigned getDwarfVersion() const { return DwarfVersion; } + /// Returns the section symbol for the .debug_loc section. + MCSymbol *getDebugLocSym() const { return DwarfDebugLocSectionSym; } + + /// Returns the previous section that was emitted into. + const MCSection *getPrevSection() const { return PrevSection; } + + /// Returns the previous CU that was being updated + const DwarfCompileUnit *getPrevCU() const { return PrevCU; } + + /// Returns the entries for the .debug_loc section. + const SmallVectorImpl<DebugLocList> & + getDebugLocEntries() const { + return DotDebugLocEntries; + } + + /// \brief Emit an entry for the debug loc section. This can be used to + /// handle an entry that's going to be emitted into the debug loc section. + void emitDebugLocEntry(ByteStreamer &Streamer, const DebugLocEntry &Entry); + + /// Emit the location for a debug loc entry, including the size header. + void emitDebugLocEntryLocation(const DebugLocEntry &Entry); + /// Find the MDNode for the given reference. template <typename T> T resolve(DIRef<T> Ref) const { return Ref.resolve(TypeIdentifierMap); } + /// \brief Return the TypeIdentifierMap. + const DITypeIdentifierMap &getTypeIdentifierMap() const { + return TypeIdentifierMap; + } + + /// Find the DwarfCompileUnit for the given CU Die. + DwarfCompileUnit *lookupUnit(const DIE *CU) const { + return CUDieMap.lookup(CU); + } /// isSubprogramContext - Return true if Context is either a subprogram /// or another context nested inside a subprogram. bool isSubprogramContext(const MDNode *Context); - }; } // End of namespace llvm diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp index 7133458..113a9e4 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -30,7 +31,7 @@ #include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Target/Mangler.h" +#include "llvm/Support/LEB128.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetOptions.h" @@ -57,19 +58,6 @@ unsigned DwarfException::SharedTypeIds(const LandingPadInfo *L, return Count; } -/// PadLT - Order landing pads lexicographically by type id. -bool DwarfException::PadLT(const LandingPadInfo *L, const LandingPadInfo *R) { - const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds; - unsigned LSize = LIds.size(), RSize = RIds.size(); - unsigned MinSize = LSize < RSize ? LSize : RSize; - - for (unsigned i = 0; i != MinSize; ++i) - if (LIds[i] != RIds[i]) - return LIds[i] < RIds[i]; - - return LSize < RSize; -} - /// ComputeActionsTable - Compute the actions table and gather the first action /// index for each landing pad site. unsigned DwarfException:: @@ -108,7 +96,7 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads, for (std::vector<unsigned>::const_iterator I = FilterIds.begin(), E = FilterIds.end(); I != E; ++I) { FilterOffsets.push_back(Offset); - Offset -= MCAsmInfo::getULEB128Size(*I); + Offset -= getULEB128Size(*I); } FirstActions.reserve(LandingPads.size()); @@ -132,14 +120,12 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads, unsigned SizePrevIds = PrevLPI->TypeIds.size(); assert(Actions.size()); PrevAction = Actions.size() - 1; - SizeAction = - MCAsmInfo::getSLEB128Size(Actions[PrevAction].NextAction) + - MCAsmInfo::getSLEB128Size(Actions[PrevAction].ValueForTypeID); + SizeAction = getSLEB128Size(Actions[PrevAction].NextAction) + + getSLEB128Size(Actions[PrevAction].ValueForTypeID); for (unsigned j = NumShared; j != SizePrevIds; ++j) { assert(PrevAction != (unsigned)-1 && "PrevAction is invalid!"); - SizeAction -= - MCAsmInfo::getSLEB128Size(Actions[PrevAction].ValueForTypeID); + SizeAction -= getSLEB128Size(Actions[PrevAction].ValueForTypeID); SizeAction += -Actions[PrevAction].NextAction; PrevAction = Actions[PrevAction].Previous; } @@ -150,10 +136,10 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads, int TypeID = TypeIds[J]; assert(-1 - TypeID < (int)FilterOffsets.size() && "Unknown filter id!"); int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID; - unsigned SizeTypeID = MCAsmInfo::getSLEB128Size(ValueForTypeID); + unsigned SizeTypeID = getSLEB128Size(ValueForTypeID); int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0; - SizeAction = SizeTypeID + MCAsmInfo::getSLEB128Size(NextAction); + SizeAction = SizeTypeID + getSLEB128Size(NextAction); SizeSiteActions += SizeAction; ActionEntry Action = { ValueForTypeID, NextAction, PrevAction }; @@ -242,7 +228,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, I != E; ++I) { for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end(); MI != E; ++MI) { - if (!MI->isLabel()) { + if (!MI->isEHLabel()) { if (MI->isCall()) SawPotentiallyThrowing |= !CallToNoUnwindFunction(MI); continue; @@ -357,7 +343,10 @@ void DwarfException::EmitExceptionTable() { for (unsigned i = 0, N = PadInfos.size(); i != N; ++i) LandingPads.push_back(&PadInfos[i]); - std::sort(LandingPads.begin(), LandingPads.end(), PadLT); + // Order landing pads lexicographically by type id. + std::sort(LandingPads.begin(), LandingPads.end(), + [](const LandingPadInfo *L, + const LandingPadInfo *R) { return L->TypeIds < R->TypeIds; }); // Compute the actions table and gather the first action index for each // landing pad site. @@ -401,9 +390,9 @@ void DwarfException::EmitExceptionTable() { } for (unsigned i = 0, e = CallSites.size(); i < e; ++i) { - CallSiteTableLength += MCAsmInfo::getULEB128Size(CallSites[i].Action); + CallSiteTableLength += getULEB128Size(CallSites[i].Action); if (IsSJLJ) - CallSiteTableLength += MCAsmInfo::getULEB128Size(i); + CallSiteTableLength += getULEB128Size(i); } // Type infos. @@ -488,15 +477,14 @@ void DwarfException::EmitExceptionTable() { // We chose another solution: don't output padding inside the table like GCC // does, instead output it before the table. unsigned SizeTypes = TypeInfos.size() * TypeFormatSize; - unsigned CallSiteTableLengthSize = - MCAsmInfo::getULEB128Size(CallSiteTableLength); + unsigned CallSiteTableLengthSize = getULEB128Size(CallSiteTableLength); unsigned TTypeBaseOffset = sizeof(int8_t) + // Call site format CallSiteTableLengthSize + // Call site table length size CallSiteTableLength + // Call site table length SizeActions + // Actions size SizeTypes; - unsigned TTypeBaseOffsetSize = MCAsmInfo::getULEB128Size(TTypeBaseOffset); + unsigned TTypeBaseOffsetSize = getULEB128Size(TTypeBaseOffset); unsigned TotalSize = sizeof(int8_t) + // LPStart format sizeof(int8_t) + // TType format @@ -717,20 +705,19 @@ void DwarfException::EmitTypeInfos(unsigned TTypeEncoding) { } } -/// EndModule - Emit all exception information that should come after the +/// endModule - Emit all exception information that should come after the /// content. -void DwarfException::EndModule() { +void DwarfException::endModule() { llvm_unreachable("Should be implemented"); } -/// BeginFunction - Gather pre-function exception information. Assumes it's +/// beginFunction - Gather pre-function exception information. Assumes it's /// being emitted immediately after the function entry point. -void DwarfException::BeginFunction(const MachineFunction *MF) { +void DwarfException::beginFunction(const MachineFunction *MF) { llvm_unreachable("Should be implemented"); } -/// EndFunction - Gather and emit post-function exception information. -/// -void DwarfException::EndFunction() { +/// endFunction - Gather and emit post-function exception information. +void DwarfException::endFunction(const MachineFunction *) { llvm_unreachable("Should be implemented"); } diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h index 1575161..f792482 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/lib/CodeGen/AsmPrinter/DwarfException.h @@ -14,6 +14,7 @@ #ifndef LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H #define LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H +#include "AsmPrinterHandler.h" #include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/AsmPrinter.h" #include <vector> @@ -35,7 +36,7 @@ class AsmPrinter; //===----------------------------------------------------------------------===// /// DwarfException - Emits Dwarf exception handling directives. /// -class DwarfException { +class DwarfException : public AsmPrinterHandler { protected: /// Asm - Target of Dwarf emission. AsmPrinter *Asm; @@ -47,9 +48,6 @@ protected: static unsigned SharedTypeIds(const LandingPadInfo *L, const LandingPadInfo *R); - /// PadLT - Order landing pads lexicographically by type id. - static bool PadLT(const LandingPadInfo *L, const LandingPadInfo *R); - /// PadRange - Structure holding a try-range and the associated landing pad. struct PadRange { // The index of the landing pad. @@ -130,16 +128,21 @@ public: DwarfException(AsmPrinter *A); virtual ~DwarfException(); - /// EndModule - Emit all exception information that should come after the + /// endModule - Emit all exception information that should come after the /// content. - virtual void EndModule(); + void endModule() override; - /// BeginFunction - Gather pre-function exception information. Assumes being + /// beginFunction - Gather pre-function exception information. Assumes being /// emitted immediately after the function entry point. - virtual void BeginFunction(const MachineFunction *MF); + void beginFunction(const MachineFunction *MF) override; + + /// endFunction - Gather and emit post-function exception information. + void endFunction(const MachineFunction *) override; - /// EndFunction - Gather and emit post-function exception information. - virtual void EndFunction(); + // We don't need these. + void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {} + void beginInstruction(const MachineInstr *MI) override {} + void endInstruction() override {} }; class DwarfCFIException : public DwarfException { @@ -164,22 +167,26 @@ public: DwarfCFIException(AsmPrinter *A); virtual ~DwarfCFIException(); - /// EndModule - Emit all exception information that should come after the + /// endModule - Emit all exception information that should come after the /// content. - virtual void EndModule(); + void endModule() override; - /// BeginFunction - Gather pre-function exception information. Assumes being + /// beginFunction - Gather pre-function exception information. Assumes being /// emitted immediately after the function entry point. - virtual void BeginFunction(const MachineFunction *MF); + void beginFunction(const MachineFunction *MF) override; - /// EndFunction - Gather and emit post-function exception information. - virtual void EndFunction(); + /// endFunction - Gather and emit post-function exception information. + void endFunction(const MachineFunction *) override; }; class ARMException : public DwarfException { - void EmitTypeInfos(unsigned TTypeEncoding); + void EmitTypeInfos(unsigned TTypeEncoding) override; ARMTargetStreamer &getTargetStreamer(); + /// shouldEmitCFI - Per-function flag to indicate if frame CFI info + /// should be emitted. + bool shouldEmitCFI; + public: //===--------------------------------------------------------------------===// // Main entry points. @@ -187,16 +194,16 @@ public: ARMException(AsmPrinter *A); virtual ~ARMException(); - /// EndModule - Emit all exception information that should come after the + /// endModule - Emit all exception information that should come after the /// content. - virtual void EndModule(); + void endModule() override; - /// BeginFunction - Gather pre-function exception information. Assumes being + /// beginFunction - Gather pre-function exception information. Assumes being /// emitted immediately after the function entry point. - virtual void BeginFunction(const MachineFunction *MF); + void beginFunction(const MachineFunction *MF) override; - /// EndFunction - Gather and emit post-function exception information. - virtual void EndFunction(); + /// endFunction - Gather and emit post-function exception information. + void endFunction(const MachineFunction *) override; }; class Win64Exception : public DwarfException { @@ -219,16 +226,16 @@ public: Win64Exception(AsmPrinter *A); virtual ~Win64Exception(); - /// EndModule - Emit all exception information that should come after the + /// endModule - Emit all exception information that should come after the /// content. - virtual void EndModule(); + void endModule() override; - /// BeginFunction - Gather pre-function exception information. Assumes being + /// beginFunction - Gather pre-function exception information. Assumes being /// emitted immediately after the function entry point. - virtual void BeginFunction(const MachineFunction *MF); + void beginFunction(const MachineFunction *MF) override; - /// EndFunction - Gather and emit post-function exception information. - virtual void EndFunction(); + /// endFunction - Gather and emit post-function exception information. + void endFunction(const MachineFunction *) override; }; } // End of namespace llvm diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index a6ff953..82e9bb0 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/DwarfCompileUnit.cpp - Dwarf Compile Unit ------------===// +//===-- llvm/CodeGen/DwarfUnit.cpp - Dwarf Type and Compile Units ---------===// // // The LLVM Compiler Infrastructure // @@ -13,50 +13,75 @@ #define DEBUG_TYPE "dwarfdebug" -#include "DwarfCompileUnit.h" +#include "DwarfUnit.h" #include "DwarfAccelTable.h" #include "DwarfDebug.h" #include "llvm/ADT/APFloat.h" -#include "llvm/DIBuilder.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Mangler.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/Target/Mangler.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; -/// CompileUnit - Compile unit constructor. -CompileUnit::CompileUnit(unsigned UID, DIE *D, DICompileUnit Node, - AsmPrinter *A, DwarfDebug *DW, DwarfUnits *DWU) - : UniqueID(UID), Node(Node), CUDie(D), Asm(A), DD(DW), DU(DWU), - IndexTyDie(0), DebugInfoOffset(0) { +static cl::opt<bool> +GenerateDwarfTypeUnits("generate-type-units", cl::Hidden, + cl::desc("Generate DWARF4 type units."), + cl::init(false)); + +/// Unit - Unit constructor. +DwarfUnit::DwarfUnit(unsigned UID, DIE *D, DICompileUnit Node, AsmPrinter *A, + DwarfDebug *DW, DwarfFile *DWU) + : UniqueID(UID), CUNode(Node), UnitDie(D), DebugInfoOffset(0), Asm(A), + DD(DW), DU(DWU), IndexTyDie(0), Section(0), Skeleton(0) { DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1); +} + +DwarfCompileUnit::DwarfCompileUnit(unsigned UID, DIE *D, DICompileUnit Node, + AsmPrinter *A, DwarfDebug *DW, + DwarfFile *DWU) + : DwarfUnit(UID, D, Node, A, DW, DWU) { insertDIE(Node, D); } -/// ~CompileUnit - Destructor for compile unit. -CompileUnit::~CompileUnit() { +DwarfTypeUnit::DwarfTypeUnit(unsigned UID, DIE *D, DwarfCompileUnit &CU, + AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU, + MCDwarfDwoLineTable *SplitLineTable) + : DwarfUnit(UID, D, CU.getCUNode(), A, DW, DWU), CU(CU), + SplitLineTable(SplitLineTable) { + if (SplitLineTable) + addSectionOffset(UnitDie.get(), dwarf::DW_AT_stmt_list, 0); +} + +/// ~Unit - Destructor for compile unit. +DwarfUnit::~DwarfUnit() { for (unsigned j = 0, M = DIEBlocks.size(); j < M; ++j) DIEBlocks[j]->~DIEBlock(); + for (unsigned j = 0, M = DIELocs.size(); j < M; ++j) + DIELocs[j]->~DIELoc(); } /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug /// information entry. -DIEEntry *CompileUnit::createDIEEntry(DIE *Entry) { +DIEEntry *DwarfUnit::createDIEEntry(DIE *Entry) { DIEEntry *Value = new (DIEValueAllocator) DIEEntry(Entry); return Value; } /// getDefaultLowerBound - Return the default lower bound for an array. If the /// DWARF version doesn't handle the language, return -1. -int64_t CompileUnit::getDefaultLowerBound() const { +int64_t DwarfUnit::getDefaultLowerBound() const { switch (getLanguage()) { default: break; @@ -100,17 +125,23 @@ int64_t CompileUnit::getDefaultLowerBound() const { /// Check whether the DIE for this MDNode can be shared across CUs. static bool isShareableAcrossCUs(DIDescriptor D) { - // When the MDNode can be part of the type system, the DIE can be - // shared across CUs. - return D.isType() || - (D.isSubprogram() && !DISubprogram(D).isDefinition()); + // When the MDNode can be part of the type system, the DIE can be shared + // across CUs. + // Combining type units and cross-CU DIE sharing is lower value (since + // cross-CU DIE sharing is used in LTO and removes type redundancy at that + // level already) but may be implementable for some value in projects + // building multiple independent libraries with LTO and then linking those + // together. + return (D.isType() || + (D.isSubprogram() && !DISubprogram(D).isDefinition())) && + !GenerateDwarfTypeUnits; } /// getDIE - Returns the debug information entry map slot for the /// specified debug variable. We delegate the request to DwarfDebug /// when the DIE for this MDNode can be shared across CUs. The mappings /// will be kept in DwarfDebug for shareable DIEs. -DIE *CompileUnit::getDIE(DIDescriptor D) const { +DIE *DwarfUnit::getDIE(DIDescriptor D) const { if (isShareableAcrossCUs(D)) return DD->getDIE(D); return MDNodeToDieMap.lookup(D); @@ -119,7 +150,7 @@ DIE *CompileUnit::getDIE(DIDescriptor D) const { /// insertDIE - Insert DIE into the map. We delegate the request to DwarfDebug /// when the DIE for this MDNode can be shared across CUs. The mappings /// will be kept in DwarfDebug for shareable DIEs. -void CompileUnit::insertDIE(DIDescriptor Desc, DIE *D) { +void DwarfUnit::insertDIE(DIDescriptor Desc, DIE *D) { if (isShareableAcrossCUs(Desc)) { DD->insertDIE(Desc, D); return; @@ -128,7 +159,7 @@ void CompileUnit::insertDIE(DIDescriptor Desc, DIE *D) { } /// addFlag - Add a flag that is true. -void CompileUnit::addFlag(DIE *Die, dwarf::Attribute Attribute) { +void DwarfUnit::addFlag(DIE *Die, dwarf::Attribute Attribute) { if (DD->getDwarfVersion() >= 4) Die->addValue(Attribute, dwarf::DW_FORM_flag_present, DIEIntegerOne); else @@ -137,8 +168,8 @@ void CompileUnit::addFlag(DIE *Die, dwarf::Attribute Attribute) { /// addUInt - Add an unsigned integer attribute data and value. /// -void CompileUnit::addUInt(DIE *Die, dwarf::Attribute Attribute, - Optional<dwarf::Form> Form, uint64_t Integer) { +void DwarfUnit::addUInt(DIE *Die, dwarf::Attribute Attribute, + Optional<dwarf::Form> Form, uint64_t Integer) { if (!Form) Form = DIEInteger::BestForm(false, Integer); DIEValue *Value = Integer == 1 ? DIEIntegerOne : new (DIEValueAllocator) @@ -146,22 +177,22 @@ void CompileUnit::addUInt(DIE *Die, dwarf::Attribute Attribute, Die->addValue(Attribute, *Form, Value); } -void CompileUnit::addUInt(DIEBlock *Block, dwarf::Form Form, uint64_t Integer) { +void DwarfUnit::addUInt(DIE *Block, dwarf::Form Form, uint64_t Integer) { addUInt(Block, (dwarf::Attribute)0, Form, Integer); } /// addSInt - Add an signed integer attribute data and value. /// -void CompileUnit::addSInt(DIE *Die, dwarf::Attribute Attribute, - Optional<dwarf::Form> Form, int64_t Integer) { +void DwarfUnit::addSInt(DIE *Die, dwarf::Attribute Attribute, + Optional<dwarf::Form> Form, int64_t Integer) { if (!Form) Form = DIEInteger::BestForm(true, Integer); DIEValue *Value = new (DIEValueAllocator) DIEInteger(Integer); Die->addValue(Attribute, *Form, Value); } -void CompileUnit::addSInt(DIEBlock *Die, Optional<dwarf::Form> Form, - int64_t Integer) { +void DwarfUnit::addSInt(DIELoc *Die, Optional<dwarf::Form> Form, + int64_t Integer) { addSInt(Die, (dwarf::Attribute)0, Form, Integer); } @@ -170,91 +201,135 @@ void CompileUnit::addSInt(DIEBlock *Die, Optional<dwarf::Form> Form, /// more predictable sizes. In the case of split dwarf we emit an index /// into another table which gets us the static offset into the string /// table. -void CompileUnit::addString(DIE *Die, dwarf::Attribute Attribute, - StringRef String) { - DIEValue *Value; - dwarf::Form Form; - if (!DD->useSplitDwarf()) { - MCSymbol *Symb = DU->getStringPoolEntry(String); - if (Asm->needsRelocationsForDwarfStringPool()) - Value = new (DIEValueAllocator) DIELabel(Symb); - else { - MCSymbol *StringPool = DU->getStringPoolSym(); - Value = new (DIEValueAllocator) DIEDelta(Symb, StringPool); - } - Form = dwarf::DW_FORM_strp; - } else { - unsigned idx = DU->getStringPoolIndex(String); - Value = new (DIEValueAllocator) DIEInteger(idx); - Form = dwarf::DW_FORM_GNU_str_index; - } +void DwarfUnit::addString(DIE *Die, dwarf::Attribute Attribute, + StringRef String) { + + if (!DD->useSplitDwarf()) + return addLocalString(Die, Attribute, String); + + unsigned idx = DU->getStringPoolIndex(String); + DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx); DIEValue *Str = new (DIEValueAllocator) DIEString(Value, String); - Die->addValue(Attribute, Form, Str); + Die->addValue(Attribute, dwarf::DW_FORM_GNU_str_index, Str); } /// addLocalString - Add a string attribute data and value. This is guaranteed /// to be in the local string pool instead of indirected. -void CompileUnit::addLocalString(DIE *Die, dwarf::Attribute Attribute, - StringRef String) { +void DwarfUnit::addLocalString(DIE *Die, dwarf::Attribute Attribute, + StringRef String) { MCSymbol *Symb = DU->getStringPoolEntry(String); DIEValue *Value; - if (Asm->needsRelocationsForDwarfStringPool()) + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) Value = new (DIEValueAllocator) DIELabel(Symb); else { MCSymbol *StringPool = DU->getStringPoolSym(); Value = new (DIEValueAllocator) DIEDelta(Symb, StringPool); } - Die->addValue(Attribute, dwarf::DW_FORM_strp, Value); + DIEValue *Str = new (DIEValueAllocator) DIEString(Value, String); + Die->addValue(Attribute, dwarf::DW_FORM_strp, Str); } /// addExpr - Add a Dwarf expression attribute data and value. /// -void CompileUnit::addExpr(DIEBlock *Die, dwarf::Form Form, const MCExpr *Expr) { +void DwarfUnit::addExpr(DIELoc *Die, dwarf::Form Form, const MCExpr *Expr) { DIEValue *Value = new (DIEValueAllocator) DIEExpr(Expr); Die->addValue((dwarf::Attribute)0, Form, Value); } +/// addLocationList - Add a Dwarf loclistptr attribute data and value. +/// +void DwarfUnit::addLocationList(DIE *Die, dwarf::Attribute Attribute, + unsigned Index) { + DIEValue *Value = new (DIEValueAllocator) DIELocList(Index); + dwarf::Form Form = DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset + : dwarf::DW_FORM_data4; + Die->addValue(Attribute, Form, Value); +} + /// addLabel - Add a Dwarf label attribute data and value. /// -void CompileUnit::addLabel(DIE *Die, dwarf::Attribute Attribute, - dwarf::Form Form, const MCSymbol *Label) { +void DwarfUnit::addLabel(DIE *Die, dwarf::Attribute Attribute, dwarf::Form Form, + const MCSymbol *Label) { DIEValue *Value = new (DIEValueAllocator) DIELabel(Label); Die->addValue(Attribute, Form, Value); } -void CompileUnit::addLabel(DIEBlock *Die, dwarf::Form Form, - const MCSymbol *Label) { +void DwarfUnit::addLabel(DIELoc *Die, dwarf::Form Form, const MCSymbol *Label) { addLabel(Die, (dwarf::Attribute)0, Form, Label); } +/// addSectionLabel - Add a Dwarf section label attribute data and value. +/// +void DwarfUnit::addSectionLabel(DIE *Die, dwarf::Attribute Attribute, + const MCSymbol *Label) { + if (DD->getDwarfVersion() >= 4) + addLabel(Die, Attribute, dwarf::DW_FORM_sec_offset, Label); + else + addLabel(Die, Attribute, dwarf::DW_FORM_data4, Label); +} + +/// addSectionOffset - Add an offset into a section attribute data and value. +/// +void DwarfUnit::addSectionOffset(DIE *Die, dwarf::Attribute Attribute, + uint64_t Integer) { + if (DD->getDwarfVersion() >= 4) + addUInt(Die, Attribute, dwarf::DW_FORM_sec_offset, Integer); + else + addUInt(Die, Attribute, dwarf::DW_FORM_data4, Integer); +} + /// addLabelAddress - Add a dwarf label attribute data and value using /// DW_FORM_addr or DW_FORM_GNU_addr_index. /// -void CompileUnit::addLabelAddress(DIE *Die, dwarf::Attribute Attribute, - MCSymbol *Label) { +void DwarfCompileUnit::addLabelAddress(DIE *Die, dwarf::Attribute Attribute, + const MCSymbol *Label) { + + if (!DD->useSplitDwarf()) + return addLocalLabelAddress(Die, Attribute, Label); + if (Label) DD->addArangeLabel(SymbolCU(this, Label)); - if (!DD->useSplitDwarf()) { - if (Label != NULL) { - DIEValue *Value = new (DIEValueAllocator) DIELabel(Label); - Die->addValue(Attribute, dwarf::DW_FORM_addr, Value); - } else { - DIEValue *Value = new (DIEValueAllocator) DIEInteger(0); - Die->addValue(Attribute, dwarf::DW_FORM_addr, Value); - } + unsigned idx = DU->getAddrPoolIndex(Label); + DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx); + Die->addValue(Attribute, dwarf::DW_FORM_GNU_addr_index, Value); +} + +void DwarfCompileUnit::addLocalLabelAddress(DIE *Die, + dwarf::Attribute Attribute, + const MCSymbol *Label) { + if (Label) + DD->addArangeLabel(SymbolCU(this, Label)); + + if (Label) { + DIEValue *Value = new (DIEValueAllocator) DIELabel(Label); + Die->addValue(Attribute, dwarf::DW_FORM_addr, Value); } else { - unsigned idx = DU->getAddrPoolIndex(Label); - DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx); - Die->addValue(Attribute, dwarf::DW_FORM_GNU_addr_index, Value); + DIEValue *Value = new (DIEValueAllocator) DIEInteger(0); + Die->addValue(Attribute, dwarf::DW_FORM_addr, Value); } } +unsigned DwarfCompileUnit::getOrCreateSourceID(StringRef FileName, StringRef DirName) { + // If we print assembly, we can't separate .file entries according to + // compile units. Thus all files will belong to the default compile unit. + + // FIXME: add a better feature test than hasRawTextSupport. Even better, + // extend .file to support this. + return Asm->OutStreamer.EmitDwarfFileDirective( + 0, DirName, FileName, + Asm->OutStreamer.hasRawTextSupport() ? 0 : getUniqueID()); +} + +unsigned DwarfTypeUnit::getOrCreateSourceID(StringRef FileName, StringRef DirName) { + return SplitLineTable ? SplitLineTable->getFile(DirName, FileName) + : getCU().getOrCreateSourceID(FileName, DirName); +} + /// addOpAddress - Add a dwarf op address data and value using the /// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index. /// -void CompileUnit::addOpAddress(DIEBlock *Die, const MCSymbol *Sym) { - DD->addArangeLabel(SymbolCU(this, Sym)); +void DwarfUnit::addOpAddress(DIELoc *Die, const MCSymbol *Sym) { if (!DD->useSplitDwarf()) { addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); addLabel(Die, dwarf::DW_FORM_udata, Sym); @@ -264,31 +339,43 @@ void CompileUnit::addOpAddress(DIEBlock *Die, const MCSymbol *Sym) { } } -/// addDelta - Add a label delta attribute data and value. +/// addSectionDelta - Add a section label delta attribute data and value. /// -void CompileUnit::addDelta(DIE *Die, dwarf::Attribute Attribute, - dwarf::Form Form, const MCSymbol *Hi, - const MCSymbol *Lo) { +void DwarfUnit::addSectionDelta(DIE *Die, dwarf::Attribute Attribute, + const MCSymbol *Hi, const MCSymbol *Lo) { DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo); - Die->addValue(Attribute, Form, Value); + if (DD->getDwarfVersion() >= 4) + Die->addValue(Attribute, dwarf::DW_FORM_sec_offset, Value); + else + Die->addValue(Attribute, dwarf::DW_FORM_data4, Value); +} + +void DwarfUnit::addLabelDelta(DIE *Die, dwarf::Attribute Attribute, + const MCSymbol *Hi, const MCSymbol *Lo) { + DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo); + Die->addValue(Attribute, dwarf::DW_FORM_data4, Value); } /// addDIEEntry - Add a DIE attribute data and value. /// -void CompileUnit::addDIEEntry(DIE *Die, dwarf::Attribute Attribute, - DIE *Entry) { +void DwarfUnit::addDIEEntry(DIE *Die, dwarf::Attribute Attribute, DIE *Entry) { addDIEEntry(Die, Attribute, createDIEEntry(Entry)); } -void CompileUnit::addDIEEntry(DIE *Die, dwarf::Attribute Attribute, - DIEEntry *Entry) { - const DIE *DieCU = Die->getCompileUnitOrNull(); - const DIE *EntryCU = Entry->getEntry()->getCompileUnitOrNull(); +void DwarfUnit::addDIETypeSignature(DIE *Die, const DwarfTypeUnit &Type) { + Die->addValue(dwarf::DW_AT_signature, dwarf::DW_FORM_ref_sig8, + new (DIEValueAllocator) DIETypeSignature(Type)); +} + +void DwarfUnit::addDIEEntry(DIE *Die, dwarf::Attribute Attribute, + DIEEntry *Entry) { + const DIE *DieCU = Die->getUnitOrNull(); + const DIE *EntryCU = Entry->getEntry()->getUnitOrNull(); if (!DieCU) // We assume that Die belongs to this CU, if it is not linked to any CU yet. - DieCU = getCUDie(); + DieCU = getUnitDie(); if (!EntryCU) - EntryCU = getCUDie(); + EntryCU = getUnitDie(); Die->addValue(Attribute, EntryCU == DieCU ? dwarf::DW_FORM_ref4 : dwarf::DW_FORM_ref_addr, Entry); @@ -296,7 +383,7 @@ void CompileUnit::addDIEEntry(DIE *Die, dwarf::Attribute Attribute, /// Create a DIE with the given Tag, add the DIE to its parent, and /// call insertDIE if MD is not null. -DIE *CompileUnit::createAndAddDIE(unsigned Tag, DIE &Parent, DIDescriptor N) { +DIE *DwarfUnit::createAndAddDIE(unsigned Tag, DIE &Parent, DIDescriptor N) { DIE *Die = new DIE(Tag); Parent.addChild(Die); if (N) @@ -306,8 +393,14 @@ DIE *CompileUnit::createAndAddDIE(unsigned Tag, DIE &Parent, DIDescriptor N) { /// addBlock - Add block data. /// -void CompileUnit::addBlock(DIE *Die, dwarf::Attribute Attribute, - DIEBlock *Block) { +void DwarfUnit::addBlock(DIE *Die, dwarf::Attribute Attribute, DIELoc *Loc) { + Loc->ComputeSize(Asm); + DIELocs.push_back(Loc); // Memoize so we can call the destructor later on. + Die->addValue(Attribute, Loc->BestForm(DD->getDwarfVersion()), Loc); +} + +void DwarfUnit::addBlock(DIE *Die, dwarf::Attribute Attribute, + DIEBlock *Block) { Block->ComputeSize(Asm); DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on. Die->addValue(Attribute, Block->BestForm(), Block); @@ -315,17 +408,12 @@ void CompileUnit::addBlock(DIE *Die, dwarf::Attribute Attribute, /// addSourceLine - Add location information to specified debug information /// entry. -void CompileUnit::addSourceLine(DIE *Die, DIVariable V) { - // Verify variable. - if (!V.isVariable()) - return; - - unsigned Line = V.getLineNumber(); +void DwarfUnit::addSourceLine(DIE *Die, unsigned Line, StringRef File, + StringRef Directory) { if (Line == 0) return; - unsigned FileID = - DD->getOrCreateSourceID(V.getContext().getFilename(), - V.getContext().getDirectory(), getUniqueID()); + + unsigned FileID = getOrCreateSourceID(File, Directory); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); addUInt(Die, dwarf::DW_AT_decl_line, None, Line); @@ -333,98 +421,59 @@ void CompileUnit::addSourceLine(DIE *Die, DIVariable V) { /// addSourceLine - Add location information to specified debug information /// entry. -void CompileUnit::addSourceLine(DIE *Die, DIGlobalVariable G) { - // Verify global variable. - if (!G.isGlobalVariable()) - return; +void DwarfUnit::addSourceLine(DIE *Die, DIVariable V) { + assert(V.isVariable()); - unsigned Line = G.getLineNumber(); - if (Line == 0) - return; - unsigned FileID = - DD->getOrCreateSourceID(G.getFilename(), G.getDirectory(), getUniqueID()); - assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, None, Line); + addSourceLine(Die, V.getLineNumber(), V.getContext().getFilename(), + V.getContext().getDirectory()); } /// addSourceLine - Add location information to specified debug information /// entry. -void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) { - // Verify subprogram. - if (!SP.isSubprogram()) - return; +void DwarfUnit::addSourceLine(DIE *Die, DIGlobalVariable G) { + assert(G.isGlobalVariable()); - // If the line number is 0, don't add it. - unsigned Line = SP.getLineNumber(); - if (Line == 0) - return; + addSourceLine(Die, G.getLineNumber(), G.getFilename(), G.getDirectory()); +} - unsigned FileID = DD->getOrCreateSourceID(SP.getFilename(), SP.getDirectory(), - getUniqueID()); - assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, None, Line); +/// addSourceLine - Add location information to specified debug information +/// entry. +void DwarfUnit::addSourceLine(DIE *Die, DISubprogram SP) { + assert(SP.isSubprogram()); + + addSourceLine(Die, SP.getLineNumber(), SP.getFilename(), SP.getDirectory()); } /// addSourceLine - Add location information to specified debug information /// entry. -void CompileUnit::addSourceLine(DIE *Die, DIType Ty) { - // Verify type. - if (!Ty.isType()) - return; +void DwarfUnit::addSourceLine(DIE *Die, DIType Ty) { + assert(Ty.isType()); - unsigned Line = Ty.getLineNumber(); - if (Line == 0) - return; - unsigned FileID = DD->getOrCreateSourceID(Ty.getFilename(), Ty.getDirectory(), - getUniqueID()); - assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, None, Line); + addSourceLine(Die, Ty.getLineNumber(), Ty.getFilename(), Ty.getDirectory()); } /// addSourceLine - Add location information to specified debug information /// entry. -void CompileUnit::addSourceLine(DIE *Die, DIObjCProperty Ty) { - // Verify type. - if (!Ty.isObjCProperty()) - return; +void DwarfUnit::addSourceLine(DIE *Die, DIObjCProperty Ty) { + assert(Ty.isObjCProperty()); - unsigned Line = Ty.getLineNumber(); - if (Line == 0) - return; DIFile File = Ty.getFile(); - unsigned FileID = DD->getOrCreateSourceID(File.getFilename(), - File.getDirectory(), getUniqueID()); - assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, None, Line); + addSourceLine(Die, Ty.getLineNumber(), File.getFilename(), + File.getDirectory()); } /// addSourceLine - Add location information to specified debug information /// entry. -void CompileUnit::addSourceLine(DIE *Die, DINameSpace NS) { - // Verify namespace. - if (!NS.Verify()) - return; - - unsigned Line = NS.getLineNumber(); - if (Line == 0) - return; - StringRef FN = NS.getFilename(); +void DwarfUnit::addSourceLine(DIE *Die, DINameSpace NS) { + assert(NS.Verify()); - unsigned FileID = - DD->getOrCreateSourceID(FN, NS.getDirectory(), getUniqueID()); - assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, None, Line); + addSourceLine(Die, NS.getLineNumber(), NS.getFilename(), NS.getDirectory()); } /// addVariableAddress - Add DW_AT_location attribute for a /// DbgVariable based on provided MachineLocation. -void CompileUnit::addVariableAddress(const DbgVariable &DV, DIE *Die, - MachineLocation Location) { +void DwarfUnit::addVariableAddress(const DbgVariable &DV, DIE *Die, + MachineLocation Location) { if (DV.variableHasComplexAddress()) addComplexAddress(DV, Die, dwarf::DW_AT_location, Location); else if (DV.isBlockByrefVariable()) @@ -435,20 +484,53 @@ void CompileUnit::addVariableAddress(const DbgVariable &DV, DIE *Die, } /// addRegisterOp - Add register operand. -void CompileUnit::addRegisterOp(DIEBlock *TheDie, unsigned Reg) { +void DwarfUnit::addRegisterOp(DIELoc *TheDie, unsigned Reg) { const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); - unsigned DWReg = RI->getDwarfRegNum(Reg, false); + int DWReg = RI->getDwarfRegNum(Reg, false); + bool isSubRegister = DWReg < 0; + + unsigned Idx = 0; + + // Go up the super-register chain until we hit a valid dwarf register number. + for (MCSuperRegIterator SR(Reg, RI); SR.isValid() && DWReg < 0; ++SR) { + DWReg = RI->getDwarfRegNum(*SR, false); + if (DWReg >= 0) + Idx = RI->getSubRegIndex(*SR, Reg); + } + + if (DWReg < 0) { + DEBUG(dbgs() << "Invalid Dwarf register number.\n"); + addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_nop); + return; + } + + // Emit register if (DWReg < 32) addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + DWReg); else { addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_regx); addUInt(TheDie, dwarf::DW_FORM_udata, DWReg); } + + // Emit Mask + if (isSubRegister) { + unsigned Size = RI->getSubRegIdxSize(Idx); + unsigned Offset = RI->getSubRegIdxOffset(Idx); + if (Offset > 0) { + addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_bit_piece); + addUInt(TheDie, dwarf::DW_FORM_data1, Size); + addUInt(TheDie, dwarf::DW_FORM_data1, Offset); + } else { + unsigned ByteSize = Size / 8; // Assuming 8 bits per byte. + addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_piece); + addUInt(TheDie, dwarf::DW_FORM_data1, ByteSize); + } + } } /// addRegisterOffset - Add register offset. -void CompileUnit::addRegisterOffset(DIEBlock *TheDie, unsigned Reg, - int64_t Offset) { +void DwarfUnit::addRegisterOffset(DIELoc *TheDie, unsigned Reg, + int64_t Offset) { const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); unsigned DWReg = RI->getDwarfRegNum(Reg, false); const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo(); @@ -466,59 +548,59 @@ void CompileUnit::addRegisterOffset(DIEBlock *TheDie, unsigned Reg, /// addAddress - Add an address attribute to a die based on the location /// provided. -void CompileUnit::addAddress(DIE *Die, dwarf::Attribute Attribute, - const MachineLocation &Location, bool Indirect) { - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); +void DwarfUnit::addAddress(DIE *Die, dwarf::Attribute Attribute, + const MachineLocation &Location, bool Indirect) { + DIELoc *Loc = new (DIEValueAllocator) DIELoc(); if (Location.isReg() && !Indirect) - addRegisterOp(Block, Location.getReg()); + addRegisterOp(Loc, Location.getReg()); else { - addRegisterOffset(Block, Location.getReg(), Location.getOffset()); + addRegisterOffset(Loc, Location.getReg(), Location.getOffset()); if (Indirect && !Location.isReg()) { - addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); } } // Now attach the location information to the DIE. - addBlock(Die, Attribute, Block); + addBlock(Die, Attribute, Loc); } /// addComplexAddress - Start with the address based on the location provided, /// and generate the DWARF information necessary to find the actual variable -/// given the extra address information encoded in the DIVariable, starting from -/// the starting location. Add the DWARF information to the die. +/// given the extra address information encoded in the DbgVariable, starting +/// from the starting location. Add the DWARF information to the die. /// -void CompileUnit::addComplexAddress(const DbgVariable &DV, DIE *Die, - dwarf::Attribute Attribute, - const MachineLocation &Location) { - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); +void DwarfUnit::addComplexAddress(const DbgVariable &DV, DIE *Die, + dwarf::Attribute Attribute, + const MachineLocation &Location) { + DIELoc *Loc = new (DIEValueAllocator) DIELoc(); unsigned N = DV.getNumAddrElements(); unsigned i = 0; if (Location.isReg()) { if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) { // If first address element is OpPlus then emit // DW_OP_breg + Offset instead of DW_OP_reg + Offset. - addRegisterOffset(Block, Location.getReg(), DV.getAddrElement(1)); + addRegisterOffset(Loc, Location.getReg(), DV.getAddrElement(1)); i = 2; } else - addRegisterOp(Block, Location.getReg()); + addRegisterOp(Loc, Location.getReg()); } else - addRegisterOffset(Block, Location.getReg(), Location.getOffset()); + addRegisterOffset(Loc, Location.getReg(), Location.getOffset()); for (; i < N; ++i) { uint64_t Element = DV.getAddrElement(i); if (Element == DIBuilder::OpPlus) { - addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - addUInt(Block, dwarf::DW_FORM_udata, DV.getAddrElement(++i)); + addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(Loc, dwarf::DW_FORM_udata, DV.getAddrElement(++i)); } else if (Element == DIBuilder::OpDeref) { if (!Location.isReg()) - addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); } else llvm_unreachable("unknown DIBuilder Opcode"); } // Now attach the location information to the DIE. - addBlock(Die, Attribute, Block); + addBlock(Die, Attribute, Loc); } /* Byref variables, in Blocks, are declared by the programmer as "SomeType @@ -581,9 +663,9 @@ void CompileUnit::addComplexAddress(const DbgVariable &DV, DIE *Die, /// starting location. Add the DWARF information to the die. For /// more information, read large comment just above here. /// -void CompileUnit::addBlockByrefAddress(const DbgVariable &DV, DIE *Die, - dwarf::Attribute Attribute, - const MachineLocation &Location) { +void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE *Die, + dwarf::Attribute Attribute, + const MachineLocation &Location) { DIType Ty = DV.getType(); DIType TmpTy = Ty; uint16_t Tag = Ty.getTag(); @@ -620,40 +702,40 @@ void CompileUnit::addBlockByrefAddress(const DbgVariable &DV, DIE *Die, // Decode the original location, and use that as the start of the byref // variable's location. - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + DIELoc *Loc = new (DIEValueAllocator) DIELoc(); if (Location.isReg()) - addRegisterOp(Block, Location.getReg()); + addRegisterOp(Loc, Location.getReg()); else - addRegisterOffset(Block, Location.getReg(), Location.getOffset()); + addRegisterOffset(Loc, Location.getReg(), Location.getOffset()); // If we started with a pointer to the __Block_byref... struct, then // the first thing we need to do is dereference the pointer (DW_OP_deref). if (isPointer) - addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); // Next add the offset for the '__forwarding' field: // DW_OP_plus_uconst ForwardingFieldOffset. Note there's no point in // adding the offset if it's 0. if (forwardingFieldOffset > 0) { - addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - addUInt(Block, dwarf::DW_FORM_udata, forwardingFieldOffset); + addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(Loc, dwarf::DW_FORM_udata, forwardingFieldOffset); } // Now dereference the __forwarding field to get to the real __Block_byref // struct: DW_OP_deref. - addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); // Now that we've got the real __Block_byref... struct, add the offset // for the variable's field to get to the location of the actual variable: // DW_OP_plus_uconst varFieldOffset. Again, don't add if it's 0. if (varFieldOffset > 0) { - addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - addUInt(Block, dwarf::DW_FORM_udata, varFieldOffset); + addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(Loc, dwarf::DW_FORM_udata, varFieldOffset); } // Now attach the location information to the DIE. - addBlock(Die, Attribute, Block); + addBlock(Die, Attribute, Loc); } /// isTypeSigned - Return true if the type is signed. @@ -698,8 +780,9 @@ static uint64_t getBaseTypeSize(DwarfDebug *DD, DIDerivedType Ty) { DIType BaseType = DD->resolve(Ty.getTypeDerivedFrom()); - // If this type is not derived from any type then take conservative approach. - if (!BaseType.isValid()) + // If this type is not derived from any type or the type is a declaration then + // take conservative approach. + if (!BaseType.isValid() || BaseType.isForwardDecl()) return Ty.getSizeInBits(); // If this is a derived type, go ahead and get the base type, unless it's a @@ -716,8 +799,8 @@ static uint64_t getBaseTypeSize(DwarfDebug *DD, DIDerivedType Ty) { } /// addConstantValue - Add constant value entry in variable DIE. -void CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO, - DIType Ty) { +void DwarfUnit::addConstantValue(DIE *Die, const MachineOperand &MO, + DIType Ty) { // FIXME: This is a bit conservative/simple - it emits negative values at // their maximum bit width which is a bit unfortunate (& doesn't prefer // udata/sdata over dataN as suggested by the DWARF spec) @@ -755,7 +838,7 @@ void CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO, } /// addConstantFPValue - Add constant value entry in variable DIE. -void CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) { +void DwarfUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) { assert(MO.isFPImm() && "Invalid machine operand!"); DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); APFloat FPImm = MO.getFPImm()->getValueAPF(); @@ -778,19 +861,19 @@ void CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) { } /// addConstantFPValue - Add constant value entry in variable DIE. -void CompileUnit::addConstantFPValue(DIE *Die, const ConstantFP *CFP) { +void DwarfUnit::addConstantFPValue(DIE *Die, const ConstantFP *CFP) { // Pass this down to addConstantValue as an unsigned bag of bits. addConstantValue(Die, CFP->getValueAPF().bitcastToAPInt(), true); } /// addConstantValue - Add constant value entry in variable DIE. -void CompileUnit::addConstantValue(DIE *Die, const ConstantInt *CI, - bool Unsigned) { +void DwarfUnit::addConstantValue(DIE *Die, const ConstantInt *CI, + bool Unsigned) { addConstantValue(Die, CI->getValue(), Unsigned); } // addConstantValue - Add constant value entry in variable DIE. -void CompileUnit::addConstantValue(DIE *Die, const APInt &Val, bool Unsigned) { +void DwarfUnit::addConstantValue(DIE *Die, const APInt &Val, bool Unsigned) { unsigned CIBitWidth = Val.getBitWidth(); if (CIBitWidth <= 64) { // If we're a signed constant definitely use sdata. @@ -846,7 +929,7 @@ void CompileUnit::addConstantValue(DIE *Die, const APInt &Val, bool Unsigned) { } /// addTemplateParams - Add template parameters into buffer. -void CompileUnit::addTemplateParams(DIE &Buffer, DIArray TParams) { +void DwarfUnit::addTemplateParams(DIE &Buffer, DIArray TParams) { // Add template parameters. for (unsigned i = 0, e = TParams.getNumElements(); i != e; ++i) { DIDescriptor Element = TParams.getElement(i); @@ -860,9 +943,9 @@ void CompileUnit::addTemplateParams(DIE &Buffer, DIArray TParams) { } /// getOrCreateContextDIE - Get context owner's DIE. -DIE *CompileUnit::getOrCreateContextDIE(DIScope Context) { +DIE *DwarfUnit::getOrCreateContextDIE(DIScope Context) { if (!Context || Context.isFile()) - return getCUDie(); + return getUnitDie(); if (Context.isType()) return getOrCreateTypeDIE(DIType(Context)); if (Context.isNameSpace()) @@ -872,18 +955,38 @@ DIE *CompileUnit::getOrCreateContextDIE(DIScope Context) { return getDIE(Context); } +DIE *DwarfUnit::createTypeDIE(DICompositeType Ty) { + DIScope Context = resolve(Ty.getContext()); + DIE *ContextDIE = getOrCreateContextDIE(Context); + + DIE *TyDIE = getDIE(Ty); + if (TyDIE) + return TyDIE; + + // Create new type. + TyDIE = createAndAddDIE(Ty.getTag(), *ContextDIE, Ty); + + constructTypeDIE(*TyDIE, Ty); + + updateAcceleratorTables(Context, Ty, TyDIE); + return TyDIE; +} + /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the /// given DIType. -DIE *CompileUnit::getOrCreateTypeDIE(const MDNode *TyNode) { +DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) { if (!TyNode) return NULL; DIType Ty(TyNode); assert(Ty.isType()); + assert(Ty == resolve(Ty.getRef()) && + "type was not uniqued, possible ODR violation."); // Construct the context before querying for the existence of the DIE in case // such construction creates the DIE. - DIE *ContextDIE = getOrCreateContextDIE(resolve(Ty.getContext())); + DIScope Context = resolve(Ty.getContext()); + DIE *ContextDIE = getOrCreateContextDIE(Context); assert(ContextDIE); DIE *TyDIE = getDIE(Ty); @@ -893,16 +996,29 @@ DIE *CompileUnit::getOrCreateTypeDIE(const MDNode *TyNode) { // Create new type. TyDIE = createAndAddDIE(Ty.getTag(), *ContextDIE, Ty); + updateAcceleratorTables(Context, Ty, TyDIE); + if (Ty.isBasicType()) constructTypeDIE(*TyDIE, DIBasicType(Ty)); - else if (Ty.isCompositeType()) - constructTypeDIE(*TyDIE, DICompositeType(Ty)); - else { + else if (Ty.isCompositeType()) { + DICompositeType CTy(Ty); + if (GenerateDwarfTypeUnits && !Ty.isForwardDecl()) + if (MDString *TypeId = CTy.getIdentifier()) { + DD->addDwarfTypeUnitType(getCU(), TypeId->getString(), TyDIE, CTy); + // Skip updating the accelerator tables since this is not the full type. + return TyDIE; + } + constructTypeDIE(*TyDIE, CTy); + } else { assert(Ty.isDerivedType() && "Unknown kind of DIType"); constructTypeDIE(*TyDIE, DIDerivedType(Ty)); } - // If this is a named finished type then include it in the list of types - // for the accelerator tables. + + return TyDIE; +} + +void DwarfUnit::updateAcceleratorTables(DIScope Context, DIType Ty, + const DIE *TyDIE) { if (!Ty.getName().empty() && !Ty.isForwardDecl()) { bool IsImplementation = 0; if (Ty.isCompositeType()) { @@ -913,13 +1029,16 @@ DIE *CompileUnit::getOrCreateTypeDIE(const MDNode *TyNode) { } unsigned Flags = IsImplementation ? dwarf::DW_FLAG_type_implementation : 0; addAccelType(Ty.getName(), std::make_pair(TyDIE, Flags)); - } - return TyDIE; + if ((!Context || Context.isCompileUnit() || Context.isFile() || + Context.isNameSpace()) && + getCUNode().getEmissionKind() != DIBuilder::LineTablesOnly) + GlobalTypes[getParentContextString(Context) + Ty.getName().str()] = TyDIE; + } } /// addType - Add a new type attribute to the specified entity. -void CompileUnit::addType(DIE *Entity, DIType Ty, dwarf::Attribute Attribute) { +void DwarfUnit::addType(DIE *Entity, DIType Ty, dwarf::Attribute Attribute) { assert(Ty && "Trying to add a type that doesn't exist?"); // Check for pre-existence. @@ -937,66 +1056,59 @@ void CompileUnit::addType(DIE *Entity, DIType Ty, dwarf::Attribute Attribute) { Entry = createDIEEntry(Buffer); insertDIEEntry(Ty, Entry); addDIEEntry(Entity, Attribute, Entry); - - // If this is a complete composite type then include it in the - // list of global types. - addGlobalType(Ty); } // Accelerator table mutators - add each name along with its companion // DIE to the proper table while ensuring that the name that we're going // to reference is in the string table. We do this since the names we // add may not only be identical to the names in the DIE. -void CompileUnit::addAccelName(StringRef Name, DIE *Die) { +void DwarfUnit::addAccelName(StringRef Name, const DIE *Die) { + if (!DD->useDwarfAccelTables()) + return; DU->getStringPoolEntry(Name); - std::vector<DIE *> &DIEs = AccelNames[Name]; + std::vector<const DIE *> &DIEs = AccelNames[Name]; DIEs.push_back(Die); } -void CompileUnit::addAccelObjC(StringRef Name, DIE *Die) { +void DwarfUnit::addAccelObjC(StringRef Name, const DIE *Die) { + if (!DD->useDwarfAccelTables()) + return; DU->getStringPoolEntry(Name); - std::vector<DIE *> &DIEs = AccelObjC[Name]; + std::vector<const DIE *> &DIEs = AccelObjC[Name]; DIEs.push_back(Die); } -void CompileUnit::addAccelNamespace(StringRef Name, DIE *Die) { +void DwarfUnit::addAccelNamespace(StringRef Name, const DIE *Die) { + if (!DD->useDwarfAccelTables()) + return; DU->getStringPoolEntry(Name); - std::vector<DIE *> &DIEs = AccelNamespace[Name]; + std::vector<const DIE *> &DIEs = AccelNamespace[Name]; DIEs.push_back(Die); } -void CompileUnit::addAccelType(StringRef Name, std::pair<DIE *, unsigned> Die) { +void DwarfUnit::addAccelType(StringRef Name, + std::pair<const DIE *, unsigned> Die) { + if (!DD->useDwarfAccelTables()) + return; DU->getStringPoolEntry(Name); - std::vector<std::pair<DIE *, unsigned> > &DIEs = AccelTypes[Name]; + std::vector<std::pair<const DIE *, unsigned> > &DIEs = AccelTypes[Name]; DIEs.push_back(Die); } /// addGlobalName - Add a new global name to the compile unit. -void CompileUnit::addGlobalName(StringRef Name, DIE *Die, DIScope Context) { +void DwarfUnit::addGlobalName(StringRef Name, DIE *Die, DIScope Context) { + if (getCUNode().getEmissionKind() == DIBuilder::LineTablesOnly) + return; std::string FullName = getParentContextString(Context) + Name.str(); GlobalNames[FullName] = Die; } -/// addGlobalType - Add a new global type to the compile unit. -/// -void CompileUnit::addGlobalType(DIType Ty) { - DIScope Context = resolve(Ty.getContext()); - if (!Ty.getName().empty() && !Ty.isForwardDecl() && - (!Context || Context.isCompileUnit() || Context.isFile() || - Context.isNameSpace())) - if (DIEEntry *Entry = getDIEEntry(Ty)) { - std::string FullName = - getParentContextString(Context) + Ty.getName().str(); - GlobalTypes[FullName] = Entry->getEntry(); - } -} - /// getParentContextString - Walks the metadata parent chain in a language /// specific manner (using the compile unit language) and returns /// it as a string. This is done at the metadata level because DIEs may /// not currently have been added to the parent context and walking the /// DIEs looking for names is more expensive than walking the metadata. -std::string CompileUnit::getParentContextString(DIScope Context) const { +std::string DwarfUnit::getParentContextString(DIScope Context) const { if (!Context) return ""; @@ -1031,24 +1143,8 @@ std::string CompileUnit::getParentContextString(DIScope Context) const { return CS; } -/// addPubTypes - Add subprogram argument types for pubtypes section. -void CompileUnit::addPubTypes(DISubprogram SP) { - DICompositeType SPTy = SP.getType(); - uint16_t SPTag = SPTy.getTag(); - if (SPTag != dwarf::DW_TAG_subroutine_type) - return; - - DIArray Args = SPTy.getTypeArray(); - for (unsigned i = 0, e = Args.getNumElements(); i != e; ++i) { - DIType ATy(Args.getElement(i)); - if (!ATy.isType()) - continue; - addGlobalType(ATy); - } -} - /// constructTypeDIE - Construct basic type die from DIBasicType. -void CompileUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) { +void DwarfUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) { // Get core information. StringRef Name = BTy.getName(); // Add name if not anonymous or intermediate type. @@ -1067,7 +1163,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) { } /// constructTypeDIE - Construct derived type die from DIDerivedType. -void CompileUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) { +void DwarfUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) { // Get core information. StringRef Name = DTy.getName(); uint64_t Size = DTy.getSizeInBits() >> 3; @@ -1094,40 +1190,25 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) { addSourceLine(&Buffer, DTy); } -/// Return true if the type is appropriately scoped to be contained inside -/// its own type unit. -static bool isTypeUnitScoped(DIType Ty, const DwarfDebug *DD) { - DIScope Parent = DD->resolve(Ty.getContext()); - while (Parent) { - // Don't generate a hash for anything scoped inside a function. - if (Parent.isSubprogram()) - return false; - Parent = DD->resolve(Parent.getContext()); - } - return true; -} - -/// Return true if the type should be split out into a type unit. -static bool shouldCreateTypeUnit(DICompositeType CTy, const DwarfDebug *DD) { - uint16_t Tag = CTy.getTag(); - - switch (Tag) { - case dwarf::DW_TAG_structure_type: - case dwarf::DW_TAG_union_type: - case dwarf::DW_TAG_enumeration_type: - case dwarf::DW_TAG_class_type: - // If this is a class, structure, union, or enumeration type - // that is a definition (not a declaration), and not scoped - // inside a function then separate this out as a type unit. - return !CTy.isForwardDecl() && isTypeUnitScoped(CTy, DD); - default: - return false; +/// constructSubprogramArguments - Construct function argument DIEs. +void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DIArray Args) { + for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { + DIDescriptor Ty = Args.getElement(i); + if (Ty.isUnspecifiedParameter()) { + assert(i == N-1 && "Unspecified parameter must be the last argument"); + createAndAddDIE(dwarf::DW_TAG_unspecified_parameters, Buffer); + } else { + DIE *Arg = createAndAddDIE(dwarf::DW_TAG_formal_parameter, Buffer); + addType(Arg, DIType(Ty)); + if (DIType(Ty).isArtificial()) + addFlag(Arg, dwarf::DW_AT_artificial); + } } } /// constructTypeDIE - Construct type DIE from DICompositeType. -void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { - // Get core information. +void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { + // Add name if not anonymous or intermediate type. StringRef Name = CTy.getName(); uint64_t Size = CTy.getSizeInBits() >> 3; @@ -1148,19 +1229,12 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { addType(&Buffer, RTy); bool isPrototyped = true; - // Add arguments. - for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) { - DIDescriptor Ty = Elements.getElement(i); - if (Ty.isUnspecifiedParameter()) { - createAndAddDIE(dwarf::DW_TAG_unspecified_parameters, Buffer); - isPrototyped = false; - } else { - DIE *Arg = createAndAddDIE(dwarf::DW_TAG_formal_parameter, Buffer); - addType(Arg, DIType(Ty)); - if (DIType(Ty).isArtificial()) - addFlag(Arg, dwarf::DW_AT_artificial); - } - } + if (Elements.getNumElements() == 2 && + Elements.getElement(1).isUnspecifiedParameter()) + isPrototyped = false; + + constructSubprogramArguments(Buffer, Elements); + // Add prototype flag if we're dealing with a C language and the // function has been prototyped. uint16_t Language = getLanguage(); @@ -1168,6 +1242,12 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 || Language == dwarf::DW_LANG_ObjC)) addFlag(&Buffer, dwarf::DW_AT_prototyped); + + if (CTy.isLValueReference()) + addFlag(&Buffer, dwarf::DW_AT_reference); + + if (CTy.isRValueReference()) + addFlag(&Buffer, dwarf::DW_AT_rvalue_reference); } break; case dwarf::DW_TAG_structure_type: case dwarf::DW_TAG_union_type: @@ -1177,21 +1257,9 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { DIDescriptor Element = Elements.getElement(i); DIE *ElemDie = NULL; - if (Element.isSubprogram()) { - DISubprogram SP(Element); - ElemDie = getOrCreateSubprogramDIE(SP); - if (SP.isProtected()) - addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, - dwarf::DW_ACCESS_protected); - else if (SP.isPrivate()) - addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, - dwarf::DW_ACCESS_private); - else - addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, - dwarf::DW_ACCESS_public); - if (SP.isExplicit()) - addFlag(ElemDie, dwarf::DW_AT_explicit); - } else if (Element.isDerivedType()) { + if (Element.isSubprogram()) + ElemDie = getOrCreateSubprogramDIE(DISubprogram(Element)); + else if (Element.isDerivedType()) { DIDerivedType DDTy(Element); if (DDTy.getTag() == dwarf::DW_TAG_friend) { ElemDie = createAndAddDIE(dwarf::DW_TAG_friend, Buffer); @@ -1207,7 +1275,8 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { ElemDie = createAndAddDIE(Property.getTag(), Buffer); StringRef PropertyName = Property.getObjCPropertyName(); addString(ElemDie, dwarf::DW_AT_APPLE_property_name, PropertyName); - addType(ElemDie, Property.getType()); + if (Property.getType()) + addType(ElemDie, Property.getType()); addSourceLine(ElemDie, Property); StringRef GetterName = Property.getObjCPropertyGetterName(); if (!GetterName.empty()) @@ -1293,17 +1362,12 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class, dwarf::DW_FORM_data1, RLang); } - // If this is a type applicable to a type unit it then add it to the - // list of types we'll compute a hash for later. - if (shouldCreateTypeUnit(CTy, DD)) - DD->addTypeUnitType(&Buffer); } /// constructTemplateTypeParameterDIE - Construct new DIE for the given /// DITemplateTypeParameter. -void -CompileUnit::constructTemplateTypeParameterDIE(DIE &Buffer, - DITemplateTypeParameter TP) { +void DwarfUnit::constructTemplateTypeParameterDIE(DIE &Buffer, + DITemplateTypeParameter TP) { DIE *ParamDIE = createAndAddDIE(dwarf::DW_TAG_template_type_parameter, Buffer); // Add the type if it exists, it could be void and therefore no type. @@ -1316,8 +1380,8 @@ CompileUnit::constructTemplateTypeParameterDIE(DIE &Buffer, /// constructTemplateValueParameterDIE - Construct new DIE for the given /// DITemplateValueParameter. void -CompileUnit::constructTemplateValueParameterDIE(DIE &Buffer, - DITemplateValueParameter VP) { +DwarfUnit::constructTemplateValueParameterDIE(DIE &Buffer, + DITemplateValueParameter VP) { DIE *ParamDIE = createAndAddDIE(VP.getTag(), Buffer); // Add the type if there is one, template template and template parameter @@ -1333,12 +1397,12 @@ CompileUnit::constructTemplateValueParameterDIE(DIE &Buffer, else if (GlobalValue *GV = dyn_cast<GlobalValue>(Val)) { // For declaration non-type template parameters (such as global values and // functions) - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - addOpAddress(Block, Asm->getSymbol(GV)); + DIELoc *Loc = new (DIEValueAllocator) DIELoc(); + addOpAddress(Loc, Asm->getSymbol(GV)); // Emit DW_OP_stack_value to use the address as the immediate value of the // parameter, rather than a pointer to it. - addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value); - addBlock(ParamDIE, dwarf::DW_AT_location, Block); + addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value); + addBlock(ParamDIE, dwarf::DW_AT_location, Loc); } else if (VP.getTag() == dwarf::DW_TAG_GNU_template_template_param) { assert(isa<MDString>(Val)); addString(ParamDIE, dwarf::DW_AT_GNU_template_name, @@ -1352,7 +1416,7 @@ CompileUnit::constructTemplateValueParameterDIE(DIE &Buffer, } /// getOrCreateNameSpace - Create a DIE for DINameSpace. -DIE *CompileUnit::getOrCreateNameSpace(DINameSpace NS) { +DIE *DwarfUnit::getOrCreateNameSpace(DINameSpace NS) { // Construct the context before querying for the existence of the DIE in case // such construction creates the DIE. DIE *ContextDIE = getOrCreateContextDIE(NS.getContext()); @@ -1373,11 +1437,16 @@ DIE *CompileUnit::getOrCreateNameSpace(DINameSpace NS) { } /// getOrCreateSubprogramDIE - Create new DIE using SP. -DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { +DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP) { // Construct the context before querying for the existence of the DIE in case // such construction creates the DIE (as is the case for member function // declarations). - DIE *ContextDIE = getOrCreateContextDIE(resolve(SP.getContext())); + DIScope Context = resolve(SP.getContext()); + DIE *ContextDIE = getOrCreateContextDIE(Context); + + // Unique declarations based on the ODR, where applicable. + SP = DISubprogram(DD->resolve(SP.getRef())); + assert(SP.Verify()); DIE *SPDie = getDIE(SP); if (SPDie) @@ -1386,7 +1455,7 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { DISubprogram SPDecl = SP.getFunctionDeclaration(); if (SPDecl.isSubprogram()) // Add subprogram definitions to the CU die directly. - ContextDIE = CUDie.get(); + ContextDIE = UnitDie.get(); // DW_TAG_inlined_subroutine may refer to this DIE. SPDie = createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, SP); @@ -1440,7 +1509,7 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { unsigned VK = SP.getVirtuality(); if (VK) { addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, VK); - DIEBlock *Block = getDIEBlock(); + DIELoc *Block = getDIELoc(); addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); addUInt(Block, dwarf::DW_FORM_udata, SP.getVirtualIndex()); addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, Block); @@ -1453,13 +1522,7 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { // Add arguments. Do not add arguments for subprogram definition. They will // be handled while processing variables. - for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { - DIE *Arg = createAndAddDIE(dwarf::DW_TAG_formal_parameter, *SPDie); - DIType ATy(Args.getElement(i)); - addType(Arg, ATy); - if (ATy.isArtificial()) - addFlag(Arg, dwarf::DW_AT_artificial); - } + constructSubprogramArguments(*SPDie, Args); } if (SP.isArtificial()) @@ -1475,6 +1538,25 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa); } + if (SP.isLValueReference()) + addFlag(SPDie, dwarf::DW_AT_reference); + + if (SP.isRValueReference()) + addFlag(SPDie, dwarf::DW_AT_rvalue_reference); + + if (SP.isProtected()) + addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_protected); + else if (SP.isPrivate()) + addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_private); + else + addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_public); + + if (SP.isExplicit()) + addFlag(SPDie, dwarf::DW_AT_explicit); + return SPDie; } @@ -1506,17 +1588,15 @@ static const ConstantExpr *getMergedGlobalExpr(const Value *V) { } /// createGlobalVariableDIE - create global variable DIE. -void CompileUnit::createGlobalVariableDIE(DIGlobalVariable GV) { - +void DwarfCompileUnit::createGlobalVariableDIE(DIGlobalVariable GV) { // Check for pre-existence. if (getDIE(GV)) return; - if (!GV.isGlobalVariable()) - return; + assert(GV.isGlobalVariable()); DIScope GVContext = GV.getContext(); - DIType GTy = GV.getType(); + DIType GTy = DD->resolve(GV.getType()); // If this is a static data member definition, some attributes belong // to the declaration DIE. @@ -1558,44 +1638,46 @@ void CompileUnit::createGlobalVariableDIE(DIGlobalVariable GV) { bool isGlobalVariable = GV.getGlobal() != NULL; if (isGlobalVariable) { addToAccelTable = true; - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + DIELoc *Loc = new (DIEValueAllocator) DIELoc(); const MCSymbol *Sym = Asm->getSymbol(GV.getGlobal()); if (GV.getGlobal()->isThreadLocal()) { // FIXME: Make this work with -gsplit-dwarf. unsigned PointerSize = Asm->getDataLayout().getPointerSize(); assert((PointerSize == 4 || PointerSize == 8) && "Add support for other sizes if necessary"); - const MCExpr *Expr = - Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym); // Based on GCC's support for TLS: if (!DD->useSplitDwarf()) { // 1) Start with a constNu of the appropriate pointer size - addUInt(Block, dwarf::DW_FORM_data1, + addUInt(Loc, dwarf::DW_FORM_data1, PointerSize == 4 ? dwarf::DW_OP_const4u : dwarf::DW_OP_const8u); // 2) containing the (relocated) offset of the TLS variable // within the module's TLS block. - addExpr(Block, dwarf::DW_FORM_udata, Expr); + addExpr(Loc, dwarf::DW_FORM_udata, + Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym)); } else { - addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index); - addUInt(Block, dwarf::DW_FORM_udata, DU->getAddrPoolIndex(Expr)); + addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index); + addUInt(Loc, dwarf::DW_FORM_udata, + DU->getAddrPoolIndex(Sym, /* TLS */ true)); } // 3) followed by a custom OP to make the debugger do a TLS lookup. - addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_push_tls_address); - } else - addOpAddress(Block, Sym); + addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_push_tls_address); + } else { + DD->addArangeLabel(SymbolCU(this, Sym)); + addOpAddress(Loc, Sym); + } // Do not create specification DIE if context is either compile unit // or a subprogram. if (GVContext && GV.isDefinition() && !GVContext.isCompileUnit() && !GVContext.isFile() && !DD->isSubprogramContext(GVContext)) { // Create specification DIE. - VariableSpecDIE = createAndAddDIE(dwarf::DW_TAG_variable, *CUDie); + VariableSpecDIE = createAndAddDIE(dwarf::DW_TAG_variable, *UnitDie); addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, VariableDIE); - addBlock(VariableSpecDIE, dwarf::DW_AT_location, Block); + addBlock(VariableSpecDIE, dwarf::DW_AT_location, Loc); // A static member's declaration is already flagged as such. if (!SDMDecl.Verify()) addFlag(VariableDIE, dwarf::DW_AT_declaration); } else { - addBlock(VariableDIE, dwarf::DW_AT_location, Block); + addBlock(VariableDIE, dwarf::DW_AT_location, Loc); } // Add the linkage name. StringRef LinkageName = GV.getLinkageName(); @@ -1605,7 +1687,8 @@ void CompileUnit::createGlobalVariableDIE(DIGlobalVariable GV) { // TAG_variable. addString(IsStaticMember && VariableSpecDIE ? VariableSpecDIE : VariableDIE, - dwarf::DW_AT_MIPS_linkage_name, + DD->getDwarfVersion() >= 4 ? dwarf::DW_AT_linkage_name + : dwarf::DW_AT_MIPS_linkage_name, GlobalValue::getRealLinkageName(LinkageName)); } else if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(GV.getConstant())) { @@ -1617,15 +1700,17 @@ void CompileUnit::createGlobalVariableDIE(DIGlobalVariable GV) { } else if (const ConstantExpr *CE = getMergedGlobalExpr(GV->getOperand(11))) { addToAccelTable = true; // GV is a merged global. - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + DIELoc *Loc = new (DIEValueAllocator) DIELoc(); Value *Ptr = CE->getOperand(0); - addOpAddress(Block, Asm->getSymbol(cast<GlobalValue>(Ptr))); - addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + MCSymbol *Sym = Asm->getSymbol(cast<GlobalValue>(Ptr)); + DD->addArangeLabel(SymbolCU(this, Sym)); + addOpAddress(Loc, Sym); + addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); SmallVector<Value *, 3> Idx(CE->op_begin() + 1, CE->op_end()); - addUInt(Block, dwarf::DW_FORM_udata, + addUInt(Loc, dwarf::DW_FORM_udata, Asm->getDataLayout().getIndexedOffset(Ptr->getType(), Idx)); - addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); - addBlock(VariableDIE, dwarf::DW_AT_location, Block); + addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); + addBlock(VariableDIE, dwarf::DW_AT_location, Loc); } if (addToAccelTable) { @@ -1644,8 +1729,7 @@ void CompileUnit::createGlobalVariableDIE(DIGlobalVariable GV) { } /// constructSubrangeDIE - Construct subrange DIE from DISubrange. -void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, - DIE *IndexTy) { +void DwarfUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy) { DIE *DW_Subrange = createAndAddDIE(dwarf::DW_TAG_subrange_type, Buffer); addDIEEntry(DW_Subrange, dwarf::DW_AT_type, IndexTy); @@ -1670,7 +1754,7 @@ void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, } /// constructArrayTypeDIE - Construct array type DIE from DICompositeType. -void CompileUnit::constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy) { +void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy) { if (CTy.isVector()) addFlag(&Buffer, dwarf::DW_AT_GNU_vector); @@ -1682,12 +1766,12 @@ void CompileUnit::constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy) { // as different languages may have different sizes for indexes. DIE *IdxTy = getIndexTyDie(); if (!IdxTy) { - // Construct an anonymous type for index type. - IdxTy = createAndAddDIE(dwarf::DW_TAG_base_type, *CUDie.get()); - addString(IdxTy, dwarf::DW_AT_name, "int"); - addUInt(IdxTy, dwarf::DW_AT_byte_size, None, sizeof(int32_t)); + // Construct an integer type to use for indexes. + IdxTy = createAndAddDIE(dwarf::DW_TAG_base_type, *UnitDie); + addString(IdxTy, dwarf::DW_AT_name, "sizetype"); + addUInt(IdxTy, dwarf::DW_AT_byte_size, None, sizeof(int64_t)); addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, - dwarf::DW_ATE_signed); + dwarf::DW_ATE_unsigned); setIndexTyDie(IdxTy); } @@ -1701,7 +1785,7 @@ void CompileUnit::constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy) { } /// constructEnumTypeDIE - Construct an enum type DIE from DICompositeType. -void CompileUnit::constructEnumTypeDIE(DIE &Buffer, DICompositeType CTy) { +void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, DICompositeType CTy) { DIArray Elements = CTy.getTypeArray(); // Add enumerators to enumeration type. @@ -1712,7 +1796,8 @@ void CompileUnit::constructEnumTypeDIE(DIE &Buffer, DICompositeType CTy) { StringRef Name = Enum.getName(); addString(Enumerator, dwarf::DW_AT_name, Name); int64_t Value = Enum.getEnumValue(); - addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value); + addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, + Value); } } DIType DTy = resolve(CTy.getTypeDerivedFrom()); @@ -1724,7 +1809,7 @@ void CompileUnit::constructEnumTypeDIE(DIE &Buffer, DICompositeType CTy) { /// constructContainingTypeDIEs - Construct DIEs for types that contain /// vtables. -void CompileUnit::constructContainingTypeDIEs() { +void DwarfUnit::constructContainingTypeDIEs() { for (DenseMap<DIE *, const MDNode *>::iterator CI = ContainingTypeMap.begin(), CE = ContainingTypeMap.end(); CI != CE; ++CI) { @@ -1740,7 +1825,7 @@ void CompileUnit::constructContainingTypeDIEs() { } /// constructVariableDIE - Construct a DIE for the given DbgVariable. -DIE *CompileUnit::constructVariableDIE(DbgVariable &DV, bool isScopeAbstract) { +DIE *DwarfUnit::constructVariableDIE(DbgVariable &DV, bool isScopeAbstract) { StringRef Name = DV.getName(); // Define variable debug information entry. @@ -1768,10 +1853,7 @@ DIE *CompileUnit::constructVariableDIE(DbgVariable &DV, bool isScopeAbstract) { unsigned Offset = DV.getDotDebugLocOffset(); if (Offset != ~0U) { - addLabel(VariableDie, dwarf::DW_AT_location, - DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset - : dwarf::DW_FORM_data4, - Asm->GetTempSymbol("debug_loc", Offset)); + addLocationList(VariableDie, dwarf::DW_AT_location, Offset); DV.setDIE(VariableDie); return VariableDie; } @@ -1815,7 +1897,7 @@ DIE *CompileUnit::constructVariableDIE(DbgVariable &DV, bool isScopeAbstract) { } /// constructMemberDIE - Construct member DIE from DIDerivedType. -void CompileUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) { +void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) { DIE *MemberDie = createAndAddDIE(DT.getTag(), Buffer); StringRef Name = DT.getName(); if (!Name.empty()) @@ -1825,16 +1907,13 @@ void CompileUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) { addSourceLine(MemberDie, DT); - DIEBlock *MemLocationDie = new (DIEValueAllocator) DIEBlock(); - addUInt(MemLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - if (DT.getTag() == dwarf::DW_TAG_inheritance && DT.isVirtual()) { // For C++, virtual base classes are not at fixed offset. Use following // expression to extract appropriate offset from vtable. // BaseAddr = ObAddr + *((*ObAddr) - Offset) - DIEBlock *VBaseLocationDie = new (DIEValueAllocator) DIEBlock(); + DIELoc *VBaseLocationDie = new (DIEValueAllocator) DIELoc(); addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_dup); addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); @@ -1850,10 +1929,9 @@ void CompileUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) { uint64_t OffsetInBytes; if (Size != FieldSize) { - // Handle bitfield. - addUInt(MemberDie, dwarf::DW_AT_byte_size, None, - getBaseTypeSize(DD, DT) >> 3); - addUInt(MemberDie, dwarf::DW_AT_bit_size, None, DT.getSizeInBits()); + // Handle bitfield, assume bytes are 8 bits. + addUInt(MemberDie, dwarf::DW_AT_byte_size, None, FieldSize/8); + addUInt(MemberDie, dwarf::DW_AT_bit_size, None, Size); uint64_t Offset = DT.getOffsetInBits(); uint64_t AlignMask = ~(DT.getAlignInBits() - 1); @@ -1866,13 +1944,21 @@ void CompileUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) { Offset = FieldSize - (Offset + Size); addUInt(MemberDie, dwarf::DW_AT_bit_offset, None, Offset); - // Here WD_AT_data_member_location points to the anonymous + // Here DW_AT_data_member_location points to the anonymous // field that includes this bit field. OffsetInBytes = FieldOffset >> 3; } else // This is not a bitfield. OffsetInBytes = DT.getOffsetInBits() >> 3; - addUInt(MemberDie, dwarf::DW_AT_data_member_location, None, OffsetInBytes); + + if (DD->getDwarfVersion() <= 2) { + DIELoc *MemLocationDie = new (DIEValueAllocator) DIELoc(); + addUInt(MemLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(MemLocationDie, dwarf::DW_FORM_udata, OffsetInBytes); + addBlock(MemberDie, dwarf::DW_AT_data_member_location, MemLocationDie); + } else + addUInt(MemberDie, dwarf::DW_AT_data_member_location, None, + OffsetInBytes); } if (DT.isProtected()) @@ -1900,7 +1986,7 @@ void CompileUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) { } /// getOrCreateStaticMemberDIE - Create new DIE for C++ static member. -DIE *CompileUnit::getOrCreateStaticMemberDIE(DIDerivedType DT) { +DIE *DwarfUnit::getOrCreateStaticMemberDIE(DIDerivedType DT) { if (!DT.Verify()) return NULL; @@ -1944,13 +2030,89 @@ DIE *CompileUnit::getOrCreateStaticMemberDIE(DIDerivedType DT) { return StaticMemberDIE; } -void CompileUnit::emitHeader(const MCSection *ASection, - const MCSymbol *ASectionSym) { +void DwarfUnit::emitHeader(const MCSymbol *ASectionSym) const { Asm->OutStreamer.AddComment("DWARF version number"); Asm->EmitInt16(DD->getDwarfVersion()); Asm->OutStreamer.AddComment("Offset Into Abbrev. Section"); - Asm->EmitSectionOffset(Asm->GetTempSymbol(ASection->getLabelBeginName()), - ASectionSym); + // We share one abbreviations table across all units so it's always at the + // start of the section. Use a relocatable offset where needed to ensure + // linking doesn't invalidate that offset. + if (ASectionSym) + Asm->EmitSectionOffset(ASectionSym, ASectionSym); + else + // Use a constant value when no symbol is provided. + Asm->EmitInt32(0); Asm->OutStreamer.AddComment("Address Size (in bytes)"); Asm->EmitInt8(Asm->getDataLayout().getPointerSize()); } + +void DwarfUnit::addRange(RangeSpan Range) { + // Only add a range for this unit if we're emitting full debug. + if (getCUNode().getEmissionKind() == DIBuilder::FullDebug) { + // If we have no current ranges just add the range and return, otherwise, + // check the current section and CU against the previous section and CU we + // emitted into and the subprogram was contained within. If these are the + // same then extend our current range, otherwise add this as a new range. + if (CURanges.size() == 0 || + this != DD->getPrevCU() || + Asm->getCurrentSection() != DD->getPrevSection()) { + CURanges.push_back(Range); + return; + } + + assert(&(CURanges.back().getEnd()->getSection()) == + &(Range.getEnd()->getSection()) && + "We can only append to a range in the same section!"); + CURanges.back().setEnd(Range.getEnd()); + } +} + +void DwarfCompileUnit::initStmtList(MCSymbol *DwarfLineSectionSym) { + // Define start line table label for each Compile Unit. + MCSymbol *LineTableStartSym = + Asm->OutStreamer.getDwarfLineTableSymbol(getUniqueID()); + + stmtListIndex = UnitDie->getValues().size(); + + // DW_AT_stmt_list is a offset of line number information for this + // compile unit in debug_line section. For split dwarf this is + // left in the skeleton CU and so not included. + // The line table entries are not always emitted in assembly, so it + // is not okay to use line_table_start here. + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + addSectionLabel(UnitDie.get(), dwarf::DW_AT_stmt_list, LineTableStartSym); + else + addSectionDelta(UnitDie.get(), dwarf::DW_AT_stmt_list, LineTableStartSym, + DwarfLineSectionSym); +} + +void DwarfCompileUnit::applyStmtList(DIE &D) { + D.addValue(dwarf::DW_AT_stmt_list, + UnitDie->getAbbrev().getData()[stmtListIndex].getForm(), + UnitDie->getValues()[stmtListIndex]); +} + +void DwarfTypeUnit::emitHeader(const MCSymbol *ASectionSym) const { + DwarfUnit::emitHeader(ASectionSym); + Asm->OutStreamer.AddComment("Type Signature"); + Asm->OutStreamer.EmitIntValue(TypeSignature, sizeof(TypeSignature)); + Asm->OutStreamer.AddComment("Type DIE Offset"); + // In a skeleton type unit there is no type DIE so emit a zero offset. + Asm->OutStreamer.EmitIntValue(Ty ? Ty->getOffset() : 0, + sizeof(Ty->getOffset())); +} + +void DwarfTypeUnit::initSection(const MCSection *Section) { + assert(!this->Section); + this->Section = Section; + // Since each type unit is contained in its own COMDAT section, the begin + // label and the section label are the same. Using the begin label emission in + // DwarfDebug to emit the section label as well is slightly subtle/sneaky, but + // the only other alternative of lazily constructing start-of-section labels + // and storing a mapping in DwarfDebug (or AsmPrinter). + this->SectionSym = this->LabelBegin = + Asm->GetTempSymbol(Section->getLabelBeginName(), getUniqueID()); + this->LabelEnd = + Asm->GetTempSymbol(Section->getLabelEndName(), getUniqueID()); + this->LabelRange = Asm->GetTempSymbol("gnu_ranges", getUniqueID()); +} diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h index d782c88..ef713f7 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/DwarfCompileUnit.h - Dwarf Compile Unit ---*- C++ -*--===// +//===-- llvm/CodeGen/DwarfUnit.h - Dwarf Compile Unit ---*- C++ -*--===// // // The LLVM Compiler Infrastructure // @@ -18,10 +18,13 @@ #include "DwarfDebug.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" -#include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/StringMap.h" -#include "llvm/DebugInfo.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/IR/DIBuilder.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCDwarf.h" namespace llvm { @@ -30,30 +33,59 @@ class MachineOperand; class ConstantInt; class ConstantFP; class DbgVariable; +class DwarfCompileUnit; + +// Data structure to hold a range for range lists. +class RangeSpan { +public: + RangeSpan(MCSymbol *S, MCSymbol *E) : Start(S), End(E) {} + const MCSymbol *getStart() const { return Start; } + const MCSymbol *getEnd() const { return End; } + void setEnd(const MCSymbol *E) { End = E; } + +private: + const MCSymbol *Start, *End; +}; + +class RangeSpanList { +private: + // Index for locating within the debug_range section this particular span. + MCSymbol *RangeSym; + // List of ranges. + SmallVector<RangeSpan, 2> Ranges; + +public: + RangeSpanList(MCSymbol *Sym) : RangeSym(Sym) {} + MCSymbol *getSym() const { return RangeSym; } + const SmallVectorImpl<RangeSpan> &getRanges() const { return Ranges; } + void addRange(RangeSpan Range) { Ranges.push_back(Range); } +}; //===----------------------------------------------------------------------===// -/// CompileUnit - This dwarf writer support class manages information associated +/// Unit - This dwarf writer support class manages information associated /// with a source file. -class CompileUnit { +class DwarfUnit { +protected: /// UniqueID - a numeric ID unique among all CUs in the module - /// unsigned UniqueID; /// Node - MDNode for the compile unit. - DICompileUnit Node; + DICompileUnit CUNode; - /// CUDie - Compile unit debug information entry. - /// - const OwningPtr<DIE> CUDie; + /// Unit debug information entry. + const std::unique_ptr<DIE> UnitDie; + + /// Offset of the UnitDie from beginning of debug info section. + unsigned DebugInfoOffset; /// Asm - Target of Dwarf emission. AsmPrinter *Asm; // Holders for some common dwarf information. DwarfDebug *DD; - DwarfUnits *DU; + DwarfFile *DU; - /// IndexTyDie - An anonymous type for index type. Owned by CUDie. + /// IndexTyDie - An anonymous type for index type. Owned by UnitDie. DIE *IndexTyDie; /// MDNodeToDieMap - Tracks the mapping of unit level debug information @@ -65,57 +97,150 @@ class CompileUnit { DenseMap<const MDNode *, DIEEntry *> MDNodeToDIEEntryMap; /// GlobalNames - A map of globally visible named entities for this unit. - /// - StringMap<DIE *> GlobalNames; + StringMap<const DIE *> GlobalNames; /// GlobalTypes - A map of globally visible types for this unit. - /// - StringMap<DIE *> GlobalTypes; + StringMap<const DIE *> GlobalTypes; /// AccelNames - A map of names for the name accelerator table. - /// - StringMap<std::vector<DIE *> > AccelNames; - StringMap<std::vector<DIE *> > AccelObjC; - StringMap<std::vector<DIE *> > AccelNamespace; - StringMap<std::vector<std::pair<DIE *, unsigned> > > AccelTypes; + StringMap<std::vector<const DIE *> > AccelNames; + + /// AccelObjC - A map of objc spec for the objc accelerator table. + StringMap<std::vector<const DIE *> > AccelObjC; + + /// AccelNamespace - A map of names for the namespace accelerator table. + StringMap<std::vector<const DIE *> > AccelNamespace; + + /// AccelTypes - A map of names for the type accelerator table. + StringMap<std::vector<std::pair<const DIE *, unsigned> > > AccelTypes; /// DIEBlocks - A list of all the DIEBlocks in use. std::vector<DIEBlock *> DIEBlocks; + + /// DIELocs - A list of all the DIELocs in use. + std::vector<DIELoc *> DIELocs; /// ContainingTypeMap - This map is used to keep track of subprogram DIEs that /// need DW_AT_containing_type attribute. This attribute points to a DIE that /// corresponds to the MDNode mapped with the subprogram DIE. DenseMap<DIE *, const MDNode *> ContainingTypeMap; + // List of ranges for a given compile unit. + SmallVector<RangeSpan, 1> CURanges; + + // List of range lists for a given compile unit, separate from the ranges for + // the CU itself. + SmallVector<RangeSpanList, 1> CURangeLists; + // DIEValueAllocator - All DIEValues are allocated through this allocator. BumpPtrAllocator DIEValueAllocator; // DIEIntegerOne - A preallocated DIEValue because 1 is used frequently. DIEInteger *DIEIntegerOne; + /// The section this unit will be emitted in. + const MCSection *Section; + + /// A label at the start of the non-dwo section related to this unit. + MCSymbol *SectionSym; + + /// The start of the unit within its section. + MCSymbol *LabelBegin; + + /// The end of the unit within its section. + MCSymbol *LabelEnd; + + /// The label for the start of the range sets for the elements of this unit. + MCSymbol *LabelRange; + + /// Skeleton unit associated with this unit. + DwarfUnit *Skeleton; + + DwarfUnit(unsigned UID, DIE *D, DICompileUnit CU, AsmPrinter *A, + DwarfDebug *DW, DwarfFile *DWU); + public: - CompileUnit(unsigned UID, DIE *D, DICompileUnit CU, AsmPrinter *A, - DwarfDebug *DW, DwarfUnits *DWU); - ~CompileUnit(); + virtual ~DwarfUnit(); + + /// Set the skeleton unit associated with this unit. + void setSkeleton(DwarfUnit *Skel) { Skeleton = Skel; } + + /// Get the skeleton unit associated with this unit. + DwarfUnit *getSkeleton() const { return Skeleton; } + + /// Pass in the SectionSym even though we could recreate it in every compile + /// unit (type units will have actually distinct symbols once they're in + /// comdat sections). + void initSection(const MCSection *Section, MCSymbol *SectionSym) { + assert(!this->Section); + this->Section = Section; + this->SectionSym = SectionSym; + this->LabelBegin = + Asm->GetTempSymbol(Section->getLabelBeginName(), getUniqueID()); + this->LabelEnd = + Asm->GetTempSymbol(Section->getLabelEndName(), getUniqueID()); + this->LabelRange = Asm->GetTempSymbol("gnu_ranges", getUniqueID()); + } + + const MCSection *getSection() const { + assert(Section); + return Section; + } + + /// If there's a skeleton then return the section symbol for the skeleton + /// unit, otherwise return the section symbol for this unit. + MCSymbol *getLocalSectionSym() const { + if (Skeleton) + return Skeleton->getSectionSym(); + return getSectionSym(); + } + + MCSymbol *getSectionSym() const { + assert(Section); + return SectionSym; + } + + /// If there's a skeleton then return the begin label for the skeleton unit, + /// otherwise return the local label for this unit. + MCSymbol *getLocalLabelBegin() const { + if (Skeleton) + return Skeleton->getLabelBegin(); + return getLabelBegin(); + } + + MCSymbol *getLabelBegin() const { + assert(Section); + return LabelBegin; + } + + MCSymbol *getLabelEnd() const { + assert(Section); + return LabelEnd; + } + + MCSymbol *getLabelRange() const { + assert(Section); + return LabelRange; + } // Accessors. unsigned getUniqueID() const { return UniqueID; } - uint16_t getLanguage() const { return Node.getLanguage(); } - DICompileUnit getNode() const { return Node; } - DIE *getCUDie() const { return CUDie.get(); } - const StringMap<DIE *> &getGlobalNames() const { return GlobalNames; } - const StringMap<DIE *> &getGlobalTypes() const { return GlobalTypes; } + uint16_t getLanguage() const { return CUNode.getLanguage(); } + DICompileUnit getCUNode() const { return CUNode; } + DIE *getUnitDie() const { return UnitDie.get(); } + const StringMap<const DIE *> &getGlobalNames() const { return GlobalNames; } + const StringMap<const DIE *> &getGlobalTypes() const { return GlobalTypes; } - const StringMap<std::vector<DIE *> > &getAccelNames() const { + const StringMap<std::vector<const DIE *> > &getAccelNames() const { return AccelNames; } - const StringMap<std::vector<DIE *> > &getAccelObjC() const { + const StringMap<std::vector<const DIE *> > &getAccelObjC() const { return AccelObjC; } - const StringMap<std::vector<DIE *> > &getAccelNamespace() const { + const StringMap<std::vector<const DIE *> > &getAccelNamespace() const { return AccelNamespace; } - const StringMap<std::vector<std::pair<DIE *, unsigned> > > & + const StringMap<std::vector<std::pair<const DIE *, unsigned> > > & getAccelTypes() const { return AccelTypes; } @@ -124,8 +249,23 @@ public: void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; } /// hasContent - Return true if this compile unit has something to write out. - /// - bool hasContent() const { return !CUDie->getChildren().empty(); } + bool hasContent() const { return !UnitDie->getChildren().empty(); } + + /// addRange - Add an address range to the list of ranges for this unit. + void addRange(RangeSpan Range); + + /// getRanges - Get the list of ranges for this unit. + const SmallVectorImpl<RangeSpan> &getRanges() const { return CURanges; } + SmallVectorImpl<RangeSpan> &getRanges() { return CURanges; } + + /// addRangeList - Add an address range list to the list of range lists. + void addRangeList(RangeSpanList Ranges) { CURangeLists.push_back(Ranges); } + + /// getRangeLists - Get the vector of range lists. + const SmallVectorImpl<RangeSpanList> &getRangeLists() const { + return CURangeLists; + } + SmallVectorImpl<RangeSpanList> &getRangeLists() { return CURangeLists; } /// getParentContextString - Get a string containing the language specific /// context for a global name. @@ -135,24 +275,17 @@ public: /// void addGlobalName(StringRef Name, DIE *Die, DIScope Context); - /// addGlobalType - Add a new global type to the compile unit. - /// - void addGlobalType(DIType Ty); - - /// addPubTypes - Add a set of types from the subprogram to the global types. - void addPubTypes(DISubprogram SP); - /// addAccelName - Add a new name to the name accelerator table. - void addAccelName(StringRef Name, DIE *Die); + void addAccelName(StringRef Name, const DIE *Die); /// addAccelObjC - Add a new name to the ObjC accelerator table. - void addAccelObjC(StringRef Name, DIE *Die); + void addAccelObjC(StringRef Name, const DIE *Die); /// addAccelNamespace - Add a new name to the namespace accelerator table. - void addAccelNamespace(StringRef Name, DIE *Die); + void addAccelNamespace(StringRef Name, const DIE *Die); /// addAccelType - Add a new type to the type accelerator table. - void addAccelType(StringRef Name, std::pair<DIE *, unsigned> Die); + void addAccelType(StringRef Name, std::pair<const DIE *, unsigned> Die); /// getDIE - Returns the debug information entry map slot for the /// specified debug variable. We delegate the request to DwarfDebug @@ -161,7 +294,8 @@ public: /// kept in DwarfDebug. DIE *getDIE(DIDescriptor D) const; - DIEBlock *getDIEBlock() { return new (DIEValueAllocator) DIEBlock(); } + /// getDIELoc - Returns a fresh newly allocated DIELoc. + DIELoc *getDIELoc() { return new (DIEValueAllocator) DIELoc(); } /// insertDIE - Insert DIE into the map. We delegate the request to DwarfDebug /// when the MDNode can be part of the type system, since DIEs for @@ -171,73 +305,81 @@ public: /// addDie - Adds or interns the DIE to the compile unit. /// - void addDie(DIE *Buffer) { CUDie->addChild(Buffer); } + void addDie(DIE *Buffer) { UnitDie->addChild(Buffer); } /// addFlag - Add a flag that is true to the DIE. void addFlag(DIE *Die, dwarf::Attribute Attribute); /// addUInt - Add an unsigned integer attribute data and value. - /// void addUInt(DIE *Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form, uint64_t Integer); - void addUInt(DIEBlock *Block, dwarf::Form Form, uint64_t Integer); + void addUInt(DIE *Block, dwarf::Form Form, uint64_t Integer); /// addSInt - Add an signed integer attribute data and value. - /// void addSInt(DIE *Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form, int64_t Integer); - void addSInt(DIEBlock *Die, Optional<dwarf::Form> Form, int64_t Integer); + void addSInt(DIELoc *Die, Optional<dwarf::Form> Form, int64_t Integer); /// addString - Add a string attribute data and value. - /// void addString(DIE *Die, dwarf::Attribute Attribute, const StringRef Str); /// addLocalString - Add a string attribute data and value. - /// - void addLocalString(DIE *Die, dwarf::Attribute Attribute, const StringRef Str); + void addLocalString(DIE *Die, dwarf::Attribute Attribute, + const StringRef Str); /// addExpr - Add a Dwarf expression attribute data and value. - /// - void addExpr(DIEBlock *Die, dwarf::Form Form, const MCExpr *Expr); + void addExpr(DIELoc *Die, dwarf::Form Form, const MCExpr *Expr); /// addLabel - Add a Dwarf label attribute data and value. - /// void addLabel(DIE *Die, dwarf::Attribute Attribute, dwarf::Form Form, const MCSymbol *Label); - void addLabel(DIEBlock *Die, dwarf::Form Form, const MCSymbol *Label); + void addLabel(DIELoc *Die, dwarf::Form Form, const MCSymbol *Label); - /// addLabelAddress - Add a dwarf label attribute data and value using - /// either DW_FORM_addr or DW_FORM_GNU_addr_index. + /// addLocationList - Add a Dwarf loclistptr attribute data and value. + void addLocationList(DIE *Die, dwarf::Attribute Attribute, unsigned Index); + + /// addSectionLabel - Add a Dwarf section label attribute data and value. /// - void addLabelAddress(DIE *Die, dwarf::Attribute Attribute, MCSymbol *Label); + void addSectionLabel(DIE *Die, dwarf::Attribute Attribute, + const MCSymbol *Label); + + /// addSectionOffset - Add an offset into a section attribute data and value. + /// + void addSectionOffset(DIE *Die, dwarf::Attribute Attribute, uint64_t Integer); /// addOpAddress - Add a dwarf op address data and value using the /// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index. - /// - void addOpAddress(DIEBlock *Die, const MCSymbol *Label); + void addOpAddress(DIELoc *Die, const MCSymbol *Label); - /// addDelta - Add a label delta attribute data and value. - /// - void addDelta(DIE *Die, dwarf::Attribute Attribute, dwarf::Form Form, const MCSymbol *Hi, - const MCSymbol *Lo); + /// addSectionDelta - Add a label delta attribute data and value. + void addSectionDelta(DIE *Die, dwarf::Attribute Attribute, const MCSymbol *Hi, + const MCSymbol *Lo); + + /// addLabelDelta - Add a label delta attribute data and value. + void addLabelDelta(DIE *Die, dwarf::Attribute Attribute, const MCSymbol *Hi, + const MCSymbol *Lo); /// addDIEEntry - Add a DIE attribute data and value. - /// void addDIEEntry(DIE *Die, dwarf::Attribute Attribute, DIE *Entry); /// addDIEEntry - Add a DIE attribute data and value. - /// void addDIEEntry(DIE *Die, dwarf::Attribute Attribute, DIEEntry *Entry); + void addDIETypeSignature(DIE *Die, const DwarfTypeUnit &Type); + + /// addBlock - Add block data. + void addBlock(DIE *Die, dwarf::Attribute Attribute, DIELoc *Block); + /// addBlock - Add block data. - /// void addBlock(DIE *Die, dwarf::Attribute Attribute, DIEBlock *Block); /// addSourceLine - Add location information to specified debug information /// entry. + void addSourceLine(DIE *Die, unsigned Line, StringRef File, + StringRef Directory); void addSourceLine(DIE *Die, DIVariable V); void addSourceLine(DIE *Die, DIGlobalVariable G); void addSourceLine(DIE *Die, DISubprogram SP); @@ -247,8 +389,8 @@ public: /// addAddress - Add an address attribute to a die based on the location /// provided. - void addAddress(DIE *Die, dwarf::Attribute Attribute, const MachineLocation &Location, - bool Indirect = false); + void addAddress(DIE *Die, dwarf::Attribute Attribute, + const MachineLocation &Location, bool Indirect = false); /// addConstantValue - Add constant value entry in variable DIE. void addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty); @@ -263,17 +405,17 @@ public: void addTemplateParams(DIE &Buffer, DIArray TParams); /// addRegisterOp - Add register operand. - void addRegisterOp(DIEBlock *TheDie, unsigned Reg); + void addRegisterOp(DIELoc *TheDie, unsigned Reg); /// addRegisterOffset - Add register offset. - void addRegisterOffset(DIEBlock *TheDie, unsigned Reg, int64_t Offset); + void addRegisterOffset(DIELoc *TheDie, unsigned Reg, int64_t Offset); /// addComplexAddress - Start with the address based on the location provided, /// and generate the DWARF information necessary to find the actual variable /// (navigating the extra location information encoded in the type) based on /// the starting location. Add the DWARF information to the die. - /// - void addComplexAddress(const DbgVariable &DV, DIE *Die, dwarf::Attribute Attribute, + void addComplexAddress(const DbgVariable &DV, DIE *Die, + dwarf::Attribute Attribute, const MachineLocation &Location); // FIXME: Should be reformulated in terms of addComplexAddress. @@ -282,8 +424,8 @@ public: /// actual Block variable (navigating the Block struct) based on the /// starting location. Add the DWARF information to the die. Obsolete, /// please use addComplexAddress instead. - /// - void addBlockByrefAddress(const DbgVariable &DV, DIE *Die, dwarf::Attribute Attribute, + void addBlockByrefAddress(const DbgVariable &DV, DIE *Die, + dwarf::Attribute Attribute, const MachineLocation &Location); /// addVariableAddress - Add DW_AT_location attribute for a @@ -294,7 +436,8 @@ public: /// addType - Add a new type attribute to the specified entity. This takes /// and attribute parameter because DW_AT_friend attributes are also /// type references. - void addType(DIE *Entity, DIType Ty, dwarf::Attribute Attribute = dwarf::DW_AT_type); + void addType(DIE *Entity, DIType Ty, + dwarf::Attribute Attribute = dwarf::DW_AT_type); /// getOrCreateNameSpace - Create a DIE for DINameSpace. DIE *getOrCreateNameSpace(DINameSpace NS); @@ -307,10 +450,10 @@ public: DIE *getOrCreateTypeDIE(const MDNode *N); /// getOrCreateContextDIE - Get context owner's DIE. - DIE *getOrCreateContextDIE(DIScope Context); + DIE *createTypeDIE(DICompositeType Ty); - /// createGlobalVariableDIE - create global variable DIE. - void createGlobalVariableDIE(DIGlobalVariable GV); + /// getOrCreateContextDIE - Get context owner's DIE. + DIE *getOrCreateContextDIE(DIScope Context); /// constructContainingTypeDIEs - Construct DIEs for types that contain /// vtables. @@ -319,20 +462,34 @@ public: /// constructVariableDIE - Construct a DIE for the given DbgVariable. DIE *constructVariableDIE(DbgVariable &DV, bool isScopeAbstract); + /// constructSubprogramArguments - Construct function argument DIEs. + void constructSubprogramArguments(DIE &Buffer, DIArray Args); + /// Create a DIE with the given Tag, add the DIE to its parent, and /// call insertDIE if MD is not null. - DIE *createAndAddDIE(unsigned Tag, DIE &Parent, DIDescriptor N = DIDescriptor()); + DIE *createAndAddDIE(unsigned Tag, DIE &Parent, + DIDescriptor N = DIDescriptor()); /// Compute the size of a header for this unit, not including the initial /// length field. - unsigned getHeaderSize() const { + virtual unsigned getHeaderSize() const { return sizeof(int16_t) + // DWARF version number sizeof(int32_t) + // Offset Into Abbrev. Section sizeof(int8_t); // Pointer Size (in bytes) } /// Emit the header for this unit, not including the initial length field. - void emitHeader(const MCSection *ASection, const MCSymbol *ASectionSym); + virtual void emitHeader(const MCSymbol *ASectionSym) const; + + virtual DwarfCompileUnit &getCU() = 0; + +protected: + /// getOrCreateStaticMemberDIE - Create new static data member DIE. + DIE *getOrCreateStaticMemberDIE(DIDerivedType DT); + + /// Look up the source ID with the given directory and source file names. If + /// none currently exists, create a new ID and insert it in the line table. + virtual unsigned getOrCreateSourceID(StringRef File, StringRef Directory) = 0; private: /// constructTypeDIE - Construct basic type die from DIBasicType. @@ -366,12 +523,6 @@ private: void constructTemplateValueParameterDIE(DIE &Buffer, DITemplateValueParameter TVP); - /// getOrCreateStaticMemberDIE - Create new static data member DIE. - DIE *getOrCreateStaticMemberDIE(DIDerivedType DT); - - /// Offset of the CUDie from beginning of debug info section. - unsigned DebugInfoOffset; - /// getLowerBoundDefault - Return the default lower bound for an array. If the /// DWARF version doesn't handle the language, return -1. int64_t getDefaultLowerBound() const; @@ -403,7 +554,71 @@ private: template <typename T> T resolve(DIRef<T> Ref) const { return DD->resolve(Ref); } + + /// If this is a named finished type then include it in the list of types for + /// the accelerator tables. + void updateAcceleratorTables(DIScope Context, DIType Ty, const DIE *TyDIE); }; +class DwarfCompileUnit : public DwarfUnit { + /// The attribute index of DW_AT_stmt_list in the compile unit DIE, avoiding + /// the need to search for it in applyStmtList. + unsigned stmtListIndex; + +public: + DwarfCompileUnit(unsigned UID, DIE *D, DICompileUnit Node, AsmPrinter *A, + DwarfDebug *DW, DwarfFile *DWU); + + void initStmtList(MCSymbol *DwarfLineSectionSym); + + /// Apply the DW_AT_stmt_list from this compile unit to the specified DIE. + void applyStmtList(DIE &D); + + /// createGlobalVariableDIE - create global variable DIE. + void createGlobalVariableDIE(DIGlobalVariable GV); + + /// addLabelAddress - Add a dwarf label attribute data and value using + /// either DW_FORM_addr or DW_FORM_GNU_addr_index. + void addLabelAddress(DIE *Die, dwarf::Attribute Attribute, + const MCSymbol *Label); + + /// addLocalLabelAddress - Add a dwarf label attribute data and value using + /// DW_FORM_addr only. + void addLocalLabelAddress(DIE *Die, dwarf::Attribute Attribute, + const MCSymbol *Label); + + DwarfCompileUnit &getCU() override { return *this; } + + unsigned getOrCreateSourceID(StringRef FileName, StringRef DirName) override; +}; + +class DwarfTypeUnit : public DwarfUnit { +private: + uint64_t TypeSignature; + const DIE *Ty; + DwarfCompileUnit &CU; + MCDwarfDwoLineTable *SplitLineTable; + +public: + DwarfTypeUnit(unsigned UID, DIE *D, DwarfCompileUnit &CU, AsmPrinter *A, + DwarfDebug *DW, DwarfFile *DWU, + MCDwarfDwoLineTable *SplitLineTable = nullptr); + + void setTypeSignature(uint64_t Signature) { TypeSignature = Signature; } + uint64_t getTypeSignature() const { return TypeSignature; } + void setType(const DIE *Ty) { this->Ty = Ty; } + + /// Emit the header for this unit, not including the initial length field. + void emitHeader(const MCSymbol *ASectionSym) const override; + unsigned getHeaderSize() const override { + return DwarfUnit::getHeaderSize() + sizeof(uint64_t) + // Type Signature + sizeof(uint32_t); // Type DIE Offset + } + void initSection(const MCSection *Section); + DwarfCompileUnit &getCU() override { return CU; } + +protected: + unsigned getOrCreateSourceID(StringRef File, StringRef Directory) override; +}; } // end llvm namespace #endif diff --git a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp index a8fb66d..bfcbe6b 100644 --- a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp @@ -14,8 +14,8 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/GCs.h" #include "llvm/CodeGen/GCMetadataPrinter.h" +#include "llvm/CodeGen/GCs.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instruction.h" @@ -35,8 +35,8 @@ namespace { class ErlangGCPrinter : public GCMetadataPrinter { public: - void beginAssembly(AsmPrinter &AP); - void finishAssembly(AsmPrinter &AP); + void beginAssembly(AsmPrinter &AP) override; + void finishAssembly(AsmPrinter &AP) override; }; } diff --git a/lib/CodeGen/AsmPrinter/LLVMBuild.txt b/lib/CodeGen/AsmPrinter/LLVMBuild.txt index 20b1f7b..bbdb0c7 100644 --- a/lib/CodeGen/AsmPrinter/LLVMBuild.txt +++ b/lib/CodeGen/AsmPrinter/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = AsmPrinter parent = Libraries -required_libraries = Analysis CodeGen Core MC MCParser Support Target +required_libraries = Analysis CodeGen Core MC MCParser Support Target TransformUtils diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp index 98177c0..5a9ecd7 100644 --- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -16,6 +16,7 @@ #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/GCMetadataPrinter.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -23,7 +24,6 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Target/Mangler.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include <cctype> @@ -33,8 +33,8 @@ namespace { class OcamlGCMetadataPrinter : public GCMetadataPrinter { public: - void beginAssembly(AsmPrinter &AP); - void finishAssembly(AsmPrinter &AP); + void beginAssembly(AsmPrinter &AP) override; + void finishAssembly(AsmPrinter &AP) override; }; } diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/lib/CodeGen/AsmPrinter/Win64Exception.cpp index 1561012..17d8bff 100644 --- a/lib/CodeGen/AsmPrinter/Win64Exception.cpp +++ b/lib/CodeGen/AsmPrinter/Win64Exception.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -30,7 +31,6 @@ #include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Target/Mangler.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetOptions.h" @@ -44,14 +44,14 @@ Win64Exception::Win64Exception(AsmPrinter *A) Win64Exception::~Win64Exception() {} -/// EndModule - Emit all exception information that should come after the +/// endModule - Emit all exception information that should come after the /// content. -void Win64Exception::EndModule() { +void Win64Exception::endModule() { } -/// BeginFunction - Gather pre-function exception information. Assumes it's +/// beginFunction - Gather pre-function exception information. Assumes it's /// being emitted immediately after the function entry point. -void Win64Exception::BeginFunction(const MachineFunction *MF) { +void Win64Exception::beginFunction(const MachineFunction *MF) { shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false; // If any landing pads survive, we need an EH table. @@ -86,9 +86,9 @@ void Win64Exception::BeginFunction(const MachineFunction *MF) { Asm->getFunctionNumber())); } -/// EndFunction - Gather and emit post-function exception information. +/// endFunction - Gather and emit post-function exception information. /// -void Win64Exception::EndFunction() { +void Win64Exception::endFunction(const MachineFunction *) { if (!shouldEmitPersonality && !shouldEmitMoves) return; @@ -101,7 +101,8 @@ void Win64Exception::EndFunction() { if (shouldEmitPersonality) { const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()]; - const MCSymbol *Sym = TLOF.getCFIPersonalitySymbol(Per, Asm->Mang, MMI); + const MCSymbol *Sym = + TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI); Asm->OutStreamer.PushSection(); Asm->OutStreamer.EmitWin64EHHandlerData(); diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp new file mode 100644 index 0000000..50b2ca8 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp @@ -0,0 +1,336 @@ +//===-- llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp --*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing line tables info into COFF files. +// +//===----------------------------------------------------------------------===// + +#include "WinCodeViewLineTables.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/COFF.h" + +namespace llvm { + +StringRef WinCodeViewLineTables::getFullFilepath(const MDNode *S) { + assert(S); + DIDescriptor D(S); + assert((D.isCompileUnit() || D.isFile() || D.isSubprogram() || + D.isLexicalBlockFile() || D.isLexicalBlock()) && + "Unexpected scope info"); + + DIScope Scope(S); + StringRef Dir = Scope.getDirectory(), + Filename = Scope.getFilename(); + char *&Result = DirAndFilenameToFilepathMap[std::make_pair(Dir, Filename)]; + if (Result != 0) + return Result; + + // Clang emits directory and relative filename info into the IR, but CodeView + // operates on full paths. We could change Clang to emit full paths too, but + // that would increase the IR size and probably not needed for other users. + // For now, just concatenate and canonicalize the path here. + std::string Filepath; + if (Filename.find(':') == 1) + Filepath = Filename; + else + Filepath = (Dir + Twine("\\") + Filename).str(); + + // Canonicalize the path. We have to do it textually because we may no longer + // have access the file in the filesystem. + // First, replace all slashes with backslashes. + std::replace(Filepath.begin(), Filepath.end(), '/', '\\'); + + // Remove all "\.\" with "\". + size_t Cursor = 0; + while ((Cursor = Filepath.find("\\.\\", Cursor)) != std::string::npos) + Filepath.erase(Cursor, 2); + + // Replace all "\XXX\..\" with "\". Don't try too hard though as the original + // path should be well-formatted, e.g. start with a drive letter, etc. + Cursor = 0; + while ((Cursor = Filepath.find("\\..\\", Cursor)) != std::string::npos) { + // Something's wrong if the path starts with "\..\", abort. + if (Cursor == 0) + break; + + size_t PrevSlash = Filepath.rfind('\\', Cursor - 1); + if (PrevSlash == std::string::npos) + // Something's wrong, abort. + break; + + Filepath.erase(PrevSlash, Cursor + 3 - PrevSlash); + // The next ".." might be following the one we've just erased. + Cursor = PrevSlash; + } + + // Remove all duplicate backslashes. + Cursor = 0; + while ((Cursor = Filepath.find("\\\\", Cursor)) != std::string::npos) + Filepath.erase(Cursor, 1); + + Result = strdup(Filepath.c_str()); + return StringRef(Result); +} + +void WinCodeViewLineTables::maybeRecordLocation(DebugLoc DL, + const MachineFunction *MF) { + const MDNode *Scope = DL.getScope(MF->getFunction()->getContext()); + if (!Scope) + return; + StringRef Filename = getFullFilepath(Scope); + + // Skip this instruction if it has the same file:line as the previous one. + assert(CurFn); + if (!CurFn->Instrs.empty()) { + const InstrInfoTy &LastInstr = InstrInfo[CurFn->Instrs.back()]; + if (LastInstr.Filename == Filename && LastInstr.LineNumber == DL.getLine()) + return; + } + FileNameRegistry.add(Filename); + + MCSymbol *MCL = Asm->MMI->getContext().CreateTempSymbol(); + Asm->OutStreamer.EmitLabel(MCL); + CurFn->Instrs.push_back(MCL); + InstrInfo[MCL] = InstrInfoTy(Filename, DL.getLine()); +} + +WinCodeViewLineTables::WinCodeViewLineTables(AsmPrinter *AP) + : Asm(0), CurFn(0) { + MachineModuleInfo *MMI = AP->MMI; + + // If module doesn't have named metadata anchors or COFF debug section + // is not available, skip any debug info related stuff. + if (!MMI->getModule()->getNamedMetadata("llvm.dbg.cu") || + !AP->getObjFileLowering().getCOFFDebugSymbolsSection()) + return; + + // Tell MMI that we have debug info. + MMI->setDebugInfoAvailability(true); + Asm = AP; +} + +static void EmitLabelDiff(MCStreamer &Streamer, + const MCSymbol *From, const MCSymbol *To) { + MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; + MCContext &Context = Streamer.getContext(); + const MCExpr *FromRef = MCSymbolRefExpr::Create(From, Variant, Context), + *ToRef = MCSymbolRefExpr::Create(To, Variant, Context); + const MCExpr *AddrDelta = + MCBinaryExpr::Create(MCBinaryExpr::Sub, ToRef, FromRef, Context); + Streamer.EmitValue(AddrDelta, 4); +} + +void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) { + // For each function there is a separate subsection + // which holds the PC to file:line table. + const MCSymbol *Fn = Asm->getSymbol(GV); + assert(Fn); + + const FunctionInfo &FI = FnDebugInfo[GV]; + if (FI.Instrs.empty()) + return; + assert(FI.End && "Don't know where the function ends?"); + + // PCs/Instructions are grouped into segments sharing the same filename. + // Pre-calculate the lengths (in instructions) of these segments and store + // them in a map for convenience. Each index in the map is the sequential + // number of the respective instruction that starts a new segment. + DenseMap<size_t, size_t> FilenameSegmentLengths; + size_t LastSegmentEnd = 0; + StringRef PrevFilename = InstrInfo[FI.Instrs[0]].Filename; + for (size_t J = 1, F = FI.Instrs.size(); J != F; ++J) { + if (PrevFilename == InstrInfo[FI.Instrs[J]].Filename) + continue; + FilenameSegmentLengths[LastSegmentEnd] = J - LastSegmentEnd; + LastSegmentEnd = J; + PrevFilename = InstrInfo[FI.Instrs[J]].Filename; + } + FilenameSegmentLengths[LastSegmentEnd] = FI.Instrs.size() - LastSegmentEnd; + + // Emit the control code of the subsection followed by the payload size. + Asm->OutStreamer.AddComment( + "Linetable subsection for " + Twine(Fn->getName())); + Asm->EmitInt32(COFF::DEBUG_LINE_TABLE_SUBSECTION); + MCSymbol *SubsectionBegin = Asm->MMI->getContext().CreateTempSymbol(), + *SubsectionEnd = Asm->MMI->getContext().CreateTempSymbol(); + EmitLabelDiff(Asm->OutStreamer, SubsectionBegin, SubsectionEnd); + Asm->OutStreamer.EmitLabel(SubsectionBegin); + + // Identify the function this subsection is for. + Asm->OutStreamer.EmitCOFFSecRel32(Fn); + Asm->OutStreamer.EmitCOFFSectionIndex(Fn); + + // Length of the function's code, in bytes. + EmitLabelDiff(Asm->OutStreamer, Fn, FI.End); + + // PC-to-linenumber lookup table: + MCSymbol *FileSegmentEnd = 0; + for (size_t J = 0, F = FI.Instrs.size(); J != F; ++J) { + MCSymbol *Instr = FI.Instrs[J]; + assert(InstrInfo.count(Instr)); + + if (FilenameSegmentLengths.count(J)) { + // We came to a beginning of a new filename segment. + if (FileSegmentEnd) + Asm->OutStreamer.EmitLabel(FileSegmentEnd); + StringRef CurFilename = InstrInfo[FI.Instrs[J]].Filename; + assert(FileNameRegistry.Infos.count(CurFilename)); + size_t IndexInStringTable = + FileNameRegistry.Infos[CurFilename].FilenameID; + // Each segment starts with the offset of the filename + // in the string table. + Asm->OutStreamer.AddComment( + "Segment for file '" + Twine(CurFilename) + "' begins"); + MCSymbol *FileSegmentBegin = Asm->MMI->getContext().CreateTempSymbol(); + Asm->OutStreamer.EmitLabel(FileSegmentBegin); + Asm->EmitInt32(8 * IndexInStringTable); + + // Number of PC records in the lookup table. + size_t SegmentLength = FilenameSegmentLengths[J]; + Asm->EmitInt32(SegmentLength); + + // Full size of the segment for this filename, including the prev two + // records. + FileSegmentEnd = Asm->MMI->getContext().CreateTempSymbol(); + EmitLabelDiff(Asm->OutStreamer, FileSegmentBegin, FileSegmentEnd); + } + + // The first PC with the given linenumber and the linenumber itself. + EmitLabelDiff(Asm->OutStreamer, Fn, Instr); + Asm->EmitInt32(InstrInfo[Instr].LineNumber); + } + + if (FileSegmentEnd) + Asm->OutStreamer.EmitLabel(FileSegmentEnd); + Asm->OutStreamer.EmitLabel(SubsectionEnd); +} + +void WinCodeViewLineTables::endModule() { + if (FnDebugInfo.empty()) + return; + + assert(Asm != 0); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getCOFFDebugSymbolsSection()); + Asm->EmitInt32(COFF::DEBUG_SECTION_MAGIC); + + // The COFF .debug$S section consists of several subsections, each starting + // with a 4-byte control code (e.g. 0xF1, 0xF2, etc) and then a 4-byte length + // of the payload followed by the payload itself. The subsections are 4-byte + // aligned. + + for (size_t I = 0, E = VisitedFunctions.size(); I != E; ++I) + emitDebugInfoForFunction(VisitedFunctions[I]); + + // This subsection holds a file index to offset in string table table. + Asm->OutStreamer.AddComment("File index to string table offset subsection"); + Asm->EmitInt32(COFF::DEBUG_INDEX_SUBSECTION); + size_t NumFilenames = FileNameRegistry.Infos.size(); + Asm->EmitInt32(8 * NumFilenames); + for (size_t I = 0, E = FileNameRegistry.Filenames.size(); I != E; ++I) { + StringRef Filename = FileNameRegistry.Filenames[I]; + // For each unique filename, just write it's offset in the string table. + Asm->EmitInt32(FileNameRegistry.Infos[Filename].StartOffset); + // The function name offset is not followed by any additional data. + Asm->EmitInt32(0); + } + + // This subsection holds the string table. + Asm->OutStreamer.AddComment("String table"); + Asm->EmitInt32(COFF::DEBUG_STRING_TABLE_SUBSECTION); + Asm->EmitInt32(FileNameRegistry.LastOffset); + // The payload starts with a null character. + Asm->EmitInt8(0); + + for (size_t I = 0, E = FileNameRegistry.Filenames.size(); I != E; ++I) { + // Just emit unique filenames one by one, separated by a null character. + Asm->OutStreamer.EmitBytes(FileNameRegistry.Filenames[I]); + Asm->EmitInt8(0); + } + + // No more subsections. Fill with zeros to align the end of the section by 4. + Asm->OutStreamer.EmitFill((-FileNameRegistry.LastOffset) % 4, 0); + + clear(); +} + +void WinCodeViewLineTables::beginFunction(const MachineFunction *MF) { + assert(!CurFn && "Can't process two functions at once!"); + + if (!Asm || !Asm->MMI->hasDebugInfo()) + return; + + const Function *GV = MF->getFunction(); + assert(FnDebugInfo.count(GV) == false); + VisitedFunctions.push_back(GV); + CurFn = &FnDebugInfo[GV]; + + // Find the end of the function prolog. + // FIXME: is there a simpler a way to do this? Can we just search + // for the first instruction of the function, not the last of the prolog? + DebugLoc PrologEndLoc; + bool EmptyPrologue = true; + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); + I != E && PrologEndLoc.isUnknown(); ++I) { + for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); + II != IE; ++II) { + const MachineInstr *MI = II; + if (MI->isDebugValue()) + continue; + + // First known non-DBG_VALUE and non-frame setup location marks + // the beginning of the function body. + // FIXME: do we need the first subcondition? + if (!MI->getFlag(MachineInstr::FrameSetup) && + (!MI->getDebugLoc().isUnknown())) { + PrologEndLoc = MI->getDebugLoc(); + break; + } + EmptyPrologue = false; + } + } + // Record beginning of function if we have a non-empty prologue. + if (!PrologEndLoc.isUnknown() && !EmptyPrologue) { + DebugLoc FnStartDL = + PrologEndLoc.getFnDebugLoc(MF->getFunction()->getContext()); + maybeRecordLocation(FnStartDL, MF); + } +} + +void WinCodeViewLineTables::endFunction(const MachineFunction *MF) { + if (!Asm || !CurFn) // We haven't created any debug info for this function. + return; + + const Function *GV = MF->getFunction(); + assert(FnDebugInfo.count(GV) == true); + assert(CurFn == &FnDebugInfo[GV]); + + if (CurFn->Instrs.empty()) { + FnDebugInfo.erase(GV); + VisitedFunctions.pop_back(); + } else { + // Define end label for subprogram. + MCSymbol *FunctionEndSym = Asm->OutStreamer.getContext().CreateTempSymbol(); + Asm->OutStreamer.EmitLabel(FunctionEndSym); + CurFn->End = FunctionEndSym; + } + CurFn = 0; +} + +void WinCodeViewLineTables::beginInstruction(const MachineInstr *MI) { + // Ignore DBG_VALUE locations and function prologue. + if (!Asm || MI->isDebugValue() || MI->getFlag(MachineInstr::FrameSetup)) + return; + DebugLoc DL = MI->getDebugLoc(); + if (DL == PrevInstLoc || DL.isUnknown()) + return; + maybeRecordLocation(DL, Asm->MF); +} +} diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h new file mode 100644 index 0000000..a7a6205 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h @@ -0,0 +1,144 @@ +//===-- llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h ----*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing line tables info into COFF files. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ASMPRINTER_WINCODEVIEWLINETABLES_H__ +#define CODEGEN_ASMPRINTER_WINCODEVIEWLINETABLES_H__ + +#include "AsmPrinterHandler.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/LexicalScopes.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Target/TargetLoweringObjectFile.h" + +namespace llvm { +/// \brief Collects and handles line tables information in a CodeView format. +class WinCodeViewLineTables : public AsmPrinterHandler { + AsmPrinter *Asm; + DebugLoc PrevInstLoc; + + // For each function, store a vector of labels to its instructions, as well as + // to the end of the function. + struct FunctionInfo { + SmallVector<MCSymbol *, 10> Instrs; + MCSymbol *End; + FunctionInfo() : End(0) {} + } *CurFn; + + typedef DenseMap<const Function *, FunctionInfo> FnDebugInfoTy; + FnDebugInfoTy FnDebugInfo; + // Store the functions we've visited in a vector so we can maintain a stable + // order while emitting subsections. + SmallVector<const Function *, 10> VisitedFunctions; + + // InstrInfoTy - Holds the Filename:LineNumber information for every + // instruction with a unique debug location. + struct InstrInfoTy { + StringRef Filename; + unsigned LineNumber; + + InstrInfoTy() : LineNumber(0) {} + + InstrInfoTy(StringRef Filename, unsigned LineNumber) + : Filename(Filename), LineNumber(LineNumber) {} + }; + DenseMap<MCSymbol *, InstrInfoTy> InstrInfo; + + // FileNameRegistry - Manages filenames observed while generating debug info + // by filtering out duplicates and bookkeeping the offsets in the string + // table to be generated. + struct FileNameRegistryTy { + SmallVector<StringRef, 10> Filenames; + struct PerFileInfo { + size_t FilenameID, StartOffset; + }; + StringMap<PerFileInfo> Infos; + + // The offset in the string table where we'll write the next unique + // filename. + size_t LastOffset; + + FileNameRegistryTy() { + clear(); + } + + // Add Filename to the registry, if it was not observed before. + void add(StringRef Filename) { + if (Infos.count(Filename)) + return; + size_t OldSize = Infos.size(); + Infos[Filename].FilenameID = OldSize; + Infos[Filename].StartOffset = LastOffset; + LastOffset += Filename.size() + 1; + Filenames.push_back(Filename); + } + + void clear() { + LastOffset = 1; + Infos.clear(); + Filenames.clear(); + } + } FileNameRegistry; + + typedef std::map<std::pair<StringRef, StringRef>, char *> + DirAndFilenameToFilepathMapTy; + DirAndFilenameToFilepathMapTy DirAndFilenameToFilepathMap; + StringRef getFullFilepath(const MDNode *S); + + void maybeRecordLocation(DebugLoc DL, const MachineFunction *MF); + + void clear() { + assert(CurFn == 0); + FileNameRegistry.clear(); + InstrInfo.clear(); + } + + void emitDebugInfoForFunction(const Function *GV); + +public: + WinCodeViewLineTables(AsmPrinter *Asm); + + ~WinCodeViewLineTables() { + for (DirAndFilenameToFilepathMapTy::iterator + I = DirAndFilenameToFilepathMap.begin(), + E = DirAndFilenameToFilepathMap.end(); + I != E; ++I) + free(I->second); + } + + void setSymbolSize(const llvm::MCSymbol *, uint64_t) override {} + + /// \brief Emit the COFF section that holds the line table information. + void endModule() override; + + /// \brief Gather pre-function debug information. + void beginFunction(const MachineFunction *MF) override; + + /// \brief Gather post-function debug information. + void endFunction(const MachineFunction *) override; + + /// \brief Process beginning of an instruction. + void beginInstruction(const MachineInstr *MI) override; + + /// \brief Process end of an instruction. + void endInstruction() override {} +}; +} // End of namespace llvm + +#endif diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp index 24aa1ab..c6654ec2 100644 --- a/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -20,12 +20,11 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Target/TargetLowering.h" #include <utility> - using namespace llvm; namespace { -class BasicTTI : public ImmutablePass, public TargetTransformInfo { +class BasicTTI final : public ImmutablePass, public TargetTransformInfo { const TargetMachine *TM; /// Estimate the overhead of scalarizing an instruction. Insert and Extract @@ -43,15 +42,11 @@ public: initializeBasicTTIPass(*PassRegistry::getPassRegistry()); } - virtual void initializePass() { + void initializePass() override { pushTTIStack(this); } - virtual void finalizePass() { - popTTIStack(); - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { TargetTransformInfo::getAnalysisUsage(AU); } @@ -59,61 +54,61 @@ public: static char ID; /// Provide necessary pointer adjustments for the two base classes. - virtual void *getAdjustedAnalysisPointer(const void *ID) { + void *getAdjustedAnalysisPointer(const void *ID) override { if (ID == &TargetTransformInfo::ID) return (TargetTransformInfo*)this; return this; } - virtual bool hasBranchDivergence() const; + bool hasBranchDivergence() const override; /// \name Scalar TTI Implementations /// @{ - virtual bool isLegalAddImmediate(int64_t imm) const; - virtual bool isLegalICmpImmediate(int64_t imm) const; - virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, - int64_t BaseOffset, bool HasBaseReg, - int64_t Scale) const; - virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, - int64_t BaseOffset, bool HasBaseReg, - int64_t Scale) const; - virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const; - virtual bool isTypeLegal(Type *Ty) const; - virtual unsigned getJumpBufAlignment() const; - virtual unsigned getJumpBufSize() const; - virtual bool shouldBuildLookupTables() const; - virtual bool haveFastSqrt(Type *Ty) const; - virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const; + bool isLegalAddImmediate(int64_t imm) const override; + bool isLegalICmpImmediate(int64_t imm) const override; + bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, + int64_t BaseOffset, bool HasBaseReg, + int64_t Scale) const override; + int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, + int64_t BaseOffset, bool HasBaseReg, + int64_t Scale) const override; + bool isTruncateFree(Type *Ty1, Type *Ty2) const override; + bool isTypeLegal(Type *Ty) const override; + unsigned getJumpBufAlignment() const override; + unsigned getJumpBufSize() const override; + bool shouldBuildLookupTables() const override; + bool haveFastSqrt(Type *Ty) const override; + void getUnrollingPreferences(Loop *L, + UnrollingPreferences &UP) const override; /// @} /// \name Vector TTI Implementations /// @{ - virtual unsigned getNumberOfRegisters(bool Vector) const; - virtual unsigned getMaximumUnrollFactor() const; - virtual unsigned getRegisterBitWidth(bool Vector) const; - virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, - OperandValueKind, - OperandValueKind) const; - virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, - int Index, Type *SubTp) const; - virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, - Type *Src) const; - virtual unsigned getCFInstrCost(unsigned Opcode) const; - virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy) const; - virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) const; - virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src, - unsigned Alignment, - unsigned AddressSpace) const; - virtual unsigned getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy, - ArrayRef<Type*> Tys) const; - virtual unsigned getNumberOfParts(Type *Tp) const; - virtual unsigned getAddressComputationCost(Type *Ty, bool IsComplex) const; - virtual unsigned getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise) const; + unsigned getNumberOfRegisters(bool Vector) const override; + unsigned getMaximumUnrollFactor() const override; + unsigned getRegisterBitWidth(bool Vector) const override; + unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind, + OperandValueKind) const override; + unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, + int Index, Type *SubTp) const override; + unsigned getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const override; + unsigned getCFInstrCost(unsigned Opcode) const override; + unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) const override; + unsigned getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const override; + unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace) const override; + unsigned getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy, + ArrayRef<Type*> Tys) const override; + unsigned getNumberOfParts(Type *Tp) const override; + unsigned getAddressComputationCost( Type *Ty, bool IsComplex) const override; + unsigned getReductionCost(unsigned Opcode, Type *Ty, + bool IsPairwise) const override; /// @} }; @@ -302,7 +297,8 @@ unsigned BasicTTI::getCastInstrCost(unsigned Opcode, Type *Dst, return 0; // If the cast is marked as legal (or promote) then assume low cost. - if (TLI->isOperationLegalOrPromote(ISD, DstLT.second)) + if (SrcLT.first == DstLT.first && + TLI->isOperationLegalOrPromote(ISD, DstLT.second)) return 1; // Handle scalar conversions. @@ -409,7 +405,9 @@ unsigned BasicTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, unsigned BasicTTI::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const { - return 1; + std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Val->getScalarType()); + + return LT.first; } unsigned BasicTTI::getMemoryOpCost(unsigned Opcode, Type *Src, @@ -418,8 +416,30 @@ unsigned BasicTTI::getMemoryOpCost(unsigned Opcode, Type *Src, assert(!Src->isVoidTy() && "Invalid type"); std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Src); - // Assume that all loads of legal types cost 1. - return LT.first; + // Assuming that all loads of legal types cost 1. + unsigned Cost = LT.first; + + if (Src->isVectorTy() && + Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) { + // This is a vector load that legalizes to a larger type than the vector + // itself. Unless the corresponding extending load or truncating store is + // legal, then this will scalarize. + TargetLowering::LegalizeAction LA; + MVT MemVT = getTLI()->getSimpleValueType(Src, true); + if (Opcode == Instruction::Store) + LA = getTLI()->getTruncStoreAction(LT.second, MemVT); + else + LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, MemVT); + + if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) { + // This is a vector load/store for some illegal type that is scalarized. + // We must account for the cost of building or decomposing the vector. + Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store, + Opcode == Instruction::Store); + } + } + + return Cost; } unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 9cd4208..b39777e 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -66,9 +66,9 @@ namespace { static char ID; explicit BranchFolderPass(): MachineFunctionPass(ID) {} - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<TargetPassConfig>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -82,8 +82,15 @@ INITIALIZE_PASS(BranchFolderPass, "branch-folder", "Control Flow Optimizer", false, false) bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) { + if (skipOptnoneFunction(*MF.getFunction())) + return false; + TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>(); - BranchFolder Folder(PassConfig->getEnableTailMerge(), /*CommonHoist=*/true); + // TailMerge can create jump into if branches that make CFG irreducible for + // HW that requires structurized CFG. + bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() && + PassConfig->getEnableTailMerge(); + BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true); return Folder.OptimizeFunction(MF, MF.getTarget().getInstrInfo(), MF.getTarget().getRegisterInfo(), @@ -379,7 +386,7 @@ void BranchFolder::MaintainLiveIns(MachineBasicBlock *CurMBB, if (RS) { RS->enterBasicBlock(CurMBB); if (!CurMBB->empty()) - RS->forward(prior(CurMBB->end())); + RS->forward(std::prev(CurMBB->end())); BitVector RegsLiveAtExit(TRI->getNumRegs()); RS->getRegsUsed(RegsLiveAtExit, false); for (unsigned int i = 0, e = TRI->getNumRegs(); i != e; i++) @@ -458,7 +465,7 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I, static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB, const TargetInstrInfo *TII) { MachineFunction *MF = CurMBB->getParent(); - MachineFunction::iterator I = llvm::next(MachineFunction::iterator(CurMBB)); + MachineFunction::iterator I = std::next(MachineFunction::iterator(CurMBB)); MachineBasicBlock *TBB = 0, *FBB = 0; SmallVector<MachineOperand, 4> Cond; DebugLoc dl; // FIXME: this is nowhere @@ -596,12 +603,11 @@ unsigned BranchFolder::ComputeSameTails(unsigned CurHash, unsigned maxCommonTailLength = 0U; SameTails.clear(); MachineBasicBlock::iterator TrialBBI1, TrialBBI2; - MPIterator HighestMPIter = prior(MergePotentials.end()); - for (MPIterator CurMPIter = prior(MergePotentials.end()), + MPIterator HighestMPIter = std::prev(MergePotentials.end()); + for (MPIterator CurMPIter = std::prev(MergePotentials.end()), B = MergePotentials.begin(); - CurMPIter != B && CurMPIter->getHash() == CurHash; - --CurMPIter) { - for (MPIterator I = prior(CurMPIter); I->getHash() == CurHash ; --I) { + CurMPIter != B && CurMPIter->getHash() == CurHash; --CurMPIter) { + for (MPIterator I = std::prev(CurMPIter); I->getHash() == CurHash; --I) { unsigned CommonTailLen; if (ProfitableToMerge(CurMPIter->getBlock(), I->getBlock(), minCommonTailLength, @@ -630,9 +636,9 @@ void BranchFolder::RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock *SuccBB, MachineBasicBlock *PredBB) { MPIterator CurMPIter, B; - for (CurMPIter = prior(MergePotentials.end()), B = MergePotentials.begin(); - CurMPIter->getHash() == CurHash; - --CurMPIter) { + for (CurMPIter = std::prev(MergePotentials.end()), + B = MergePotentials.begin(); + CurMPIter->getHash() == CurHash; --CurMPIter) { // Put the unconditional branch back, if we need one. MachineBasicBlock *CurMBB = CurMPIter->getBlock(); if (SuccBB && CurMBB != PredBB) @@ -864,12 +870,12 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // a compile-time infinite loop repeatedly doing and undoing the same // transformations.) - for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); + for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end(); I != E; ++I) { if (I->pred_size() < 2) continue; SmallPtrSet<MachineBasicBlock *, 8> UniquePreds; MachineBasicBlock *IBB = I; - MachineBasicBlock *PredBB = prior(I); + MachineBasicBlock *PredBB = std::prev(I); MergePotentials.clear(); for (MachineBasicBlock::pred_iterator P = I->pred_begin(), E2 = I->pred_end(); @@ -901,7 +907,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { continue; // This is the QBB case described above if (!FBB) - FBB = llvm::next(MachineFunction::iterator(PBB)); + FBB = std::next(MachineFunction::iterator(PBB)); } // Failing case: the only way IBB can be reached from PBB is via @@ -951,7 +957,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // Reinsert an unconditional branch if needed. The 1 below can occur as a // result of removing blocks in TryTailMergeBlocks. - PredBB = prior(I); // this may have been changed in TryTailMergeBlocks + PredBB = std::prev(I); // this may have been changed in TryTailMergeBlocks if (MergePotentials.size() == 1 && MergePotentials.begin()->getBlock() != PredBB) FixTail(MergePotentials.begin()->getBlock(), IBB, TII); @@ -970,7 +976,7 @@ bool BranchFolder::OptimizeBranches(MachineFunction &MF) { // Make sure blocks are numbered in order MF.RenumberBlocks(); - for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); + for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end(); I != E; ) { MachineBasicBlock *MBB = I++; MadeChange |= OptimizeBlock(MBB); @@ -1091,7 +1097,7 @@ ReoptimizeBlock: // Check to see if we can simplify the terminator of the block before this // one. - MachineBasicBlock &PrevBB = *prior(MachineFunction::iterator(MBB)); + MachineBasicBlock &PrevBB = *std::prev(MachineFunction::iterator(MBB)); MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0; SmallVector<MachineOperand, 4> PriorCond; @@ -1390,7 +1396,8 @@ ReoptimizeBlock: // B elsewhere // next: if (CurFallsThru) { - MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB)); + MachineBasicBlock *NextBB = + std::next(MachineFunction::iterator(MBB)); CurCond.clear(); TII->InsertBranch(*MBB, NextBB, 0, CurCond, DebugLoc()); } @@ -1511,7 +1518,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, // branch from condition setting instruction. MachineBasicBlock::iterator PI = Loc; --PI; - while (PI != MBB->begin() && Loc->isDebugValue()) + while (PI != MBB->begin() && PI->isDebugValue()) --PI; bool IsDef = false; diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 10cc9ff..8943cb1 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -7,6 +7,7 @@ add_llvm_library(LLVMCodeGen CalcSpillWeights.cpp CallingConvLower.cpp CodeGen.cpp + CodeGenPrepare.cpp CriticalAntiDepBreaker.cpp DFAPacketizer.cpp DeadMachineInstructionElim.cpp @@ -35,7 +36,7 @@ add_llvm_library(LLVMCodeGen LiveRangeCalc.cpp LiveRangeEdit.cpp LiveRegMatrix.cpp - LiveRegUnits.cpp + LivePhysRegs.cpp LiveStackAnalysis.cpp LiveVariables.cpp LocalStackSlotAllocation.cpp @@ -97,6 +98,7 @@ add_llvm_library(LLVMCodeGen StackColoring.cpp StackProtector.cpp StackSlotColoring.cpp + StackMapLivenessAnalysis.cpp StackMaps.cpp TailDuplication.cpp TargetFrameLoweringImpl.cpp diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp index 4925c4d..4833731 100644 --- a/lib/CodeGen/CalcSpillWeights.cpp +++ b/lib/CodeGen/CalcSpillWeights.cpp @@ -112,8 +112,10 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) { // Don't recompute spill weight for an unspillable register. bool Spillable = li.isSpillable(); - for (MachineRegisterInfo::reg_iterator I = mri.reg_begin(li.reg); - MachineInstr *mi = I.skipInstruction();) { + for (MachineRegisterInfo::reg_instr_iterator + I = mri.reg_instr_begin(li.reg), E = mri.reg_instr_end(); + I != E; ) { + MachineInstr *mi = &*(I++); if (mi->isIdentityCopy() || mi->isImplicitDef() || mi->isDebugValue()) continue; if (!visited.insert(mi)) @@ -130,9 +132,9 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) { // Calculate instr weight. bool reads, writes; - tie(reads, writes) = mi->readsWritesVirtualRegister(li.reg); + std::tie(reads, writes) = mi->readsWritesVirtualRegister(li.reg); weight = LiveIntervals::getSpillWeight( - writes, reads, MBFI.getBlockFreq(mi->getParent())); + writes, reads, &MBFI, mi); // Give extra weight to what looks like a loop induction variable update. if (writes && isExiting && LIS.isLiveOutOfMBB(li, mbb)) diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index 7430c53..17402f0 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -13,8 +13,8 @@ //===----------------------------------------------------------------------===// #include "llvm/InitializePasses.h" -#include "llvm/PassRegistry.h" #include "llvm-c/Initialization.h" +#include "llvm/PassRegistry.h" using namespace llvm; @@ -22,6 +22,7 @@ using namespace llvm; void llvm::initializeCodeGen(PassRegistry &Registry) { initializeBasicTTIPass(Registry); initializeBranchFolderPassPass(Registry); + initializeCodeGenPreparePass(Registry); initializeDeadMachineInstructionElimPass(Registry); initializeEarlyIfConverterPass(Registry); initializeExpandPostRAPass(Registry); @@ -51,6 +52,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeOptimizePHIsPass(Registry); initializePHIEliminationPass(Registry); initializePeepholeOptimizerPass(Registry); + initializePostMachineSchedulerPass(Registry); initializePostRASchedulerPass(Registry); initializeProcessImplicitDefsPass(Registry); initializePEIPass(Registry); @@ -69,6 +71,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeVirtRegRewriterPass(Registry); initializeLowerIntrinsicsPass(Registry); initializeMachineFunctionPrinterPassPass(Registry); + initializeStackMapLivenessPass(Registry); } void LLVMInitializeCodeGen(LLVMPassRegistryRef R) { diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp new file mode 100644 index 0000000..e82a306 --- /dev/null +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -0,0 +1,3011 @@ +//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass munges the code in the input function to better prepare it for +// SelectionDAG-based code generation. This works around limitations in it's +// basic-block-at-a-time approach. It should eventually be removed. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "codegenprepare" +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/IR/ValueMap.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/BuildLibCalls.h" +#include "llvm/Transforms/Utils/BypassSlowDivision.h" +#include "llvm/Transforms/Utils/Local.h" +using namespace llvm; +using namespace llvm::PatternMatch; + +STATISTIC(NumBlocksElim, "Number of blocks eliminated"); +STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated"); +STATISTIC(NumGEPsElim, "Number of GEPs converted to casts"); +STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of " + "sunken Cmps"); +STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses " + "of sunken Casts"); +STATISTIC(NumMemoryInsts, "Number of memory instructions whose address " + "computations were sunk"); +STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads"); +STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized"); +STATISTIC(NumRetsDup, "Number of return instructions duplicated"); +STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved"); +STATISTIC(NumSelectsExpanded, "Number of selects turned into branches"); +STATISTIC(NumAndCmpsMoved, "Number of and/cmp's pushed into branches"); + +static cl::opt<bool> DisableBranchOpts( + "disable-cgp-branch-opts", cl::Hidden, cl::init(false), + cl::desc("Disable branch optimizations in CodeGenPrepare")); + +static cl::opt<bool> DisableSelectToBranch( + "disable-cgp-select2branch", cl::Hidden, cl::init(false), + cl::desc("Disable select to branch conversion.")); + +static cl::opt<bool> EnableAndCmpSinking( + "enable-andcmp-sinking", cl::Hidden, cl::init(true), + cl::desc("Enable sinkinig and/cmp into branches.")); + +namespace { +typedef SmallPtrSet<Instruction *, 16> SetOfInstrs; +typedef DenseMap<Instruction *, Type *> InstrToOrigTy; + + class CodeGenPrepare : public FunctionPass { + /// TLI - Keep a pointer of a TargetLowering to consult for determining + /// transformation profitability. + const TargetMachine *TM; + const TargetLowering *TLI; + const TargetLibraryInfo *TLInfo; + DominatorTree *DT; + + /// CurInstIterator - As we scan instructions optimizing them, this is the + /// next instruction to optimize. Xforms that can invalidate this should + /// update it. + BasicBlock::iterator CurInstIterator; + + /// Keeps track of non-local addresses that have been sunk into a block. + /// This allows us to avoid inserting duplicate code for blocks with + /// multiple load/stores of the same address. + ValueMap<Value*, Value*> SunkAddrs; + + /// Keeps track of all truncates inserted for the current function. + SetOfInstrs InsertedTruncsSet; + /// Keeps track of the type of the related instruction before their + /// promotion for the current function. + InstrToOrigTy PromotedInsts; + + /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to + /// be updated. + bool ModifiedDT; + + /// OptSize - True if optimizing for size. + bool OptSize; + + public: + static char ID; // Pass identification, replacement for typeid + explicit CodeGenPrepare(const TargetMachine *TM = 0) + : FunctionPass(ID), TM(TM), TLI(0) { + initializeCodeGenPreparePass(*PassRegistry::getPassRegistry()); + } + bool runOnFunction(Function &F) override; + + const char *getPassName() const override { return "CodeGen Prepare"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addPreserved<DominatorTreeWrapperPass>(); + AU.addRequired<TargetLibraryInfo>(); + } + + private: + bool EliminateFallThrough(Function &F); + bool EliminateMostlyEmptyBlocks(Function &F); + bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; + void EliminateMostlyEmptyBlock(BasicBlock *BB); + bool OptimizeBlock(BasicBlock &BB); + bool OptimizeInst(Instruction *I); + bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy); + bool OptimizeInlineAsmInst(CallInst *CS); + bool OptimizeCallInst(CallInst *CI); + bool MoveExtToFormExtLoad(Instruction *I); + bool OptimizeExtUses(Instruction *I); + bool OptimizeSelectInst(SelectInst *SI); + bool OptimizeShuffleVectorInst(ShuffleVectorInst *SI); + bool DupRetToEnableTailCallOpts(BasicBlock *BB); + bool PlaceDbgValues(Function &F); + bool sinkAndCmp(Function &F); + }; +} + +char CodeGenPrepare::ID = 0; +static void *initializeCodeGenPreparePassOnce(PassRegistry &Registry) { + initializeTargetLibraryInfoPass(Registry); + PassInfo *PI = new PassInfo( + "Optimize for code generation", "codegenprepare", &CodeGenPrepare::ID, + PassInfo::NormalCtor_t(callDefaultCtor<CodeGenPrepare>), false, false, + PassInfo::TargetMachineCtor_t(callTargetMachineCtor<CodeGenPrepare>)); + Registry.registerPass(*PI, true); + return PI; +} + +void llvm::initializeCodeGenPreparePass(PassRegistry &Registry) { + CALL_ONCE_INITIALIZATION(initializeCodeGenPreparePassOnce) +} + +FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) { + return new CodeGenPrepare(TM); +} + +bool CodeGenPrepare::runOnFunction(Function &F) { + if (skipOptnoneFunction(F)) + return false; + + bool EverMadeChange = false; + // Clear per function information. + InsertedTruncsSet.clear(); + PromotedInsts.clear(); + + ModifiedDT = false; + if (TM) TLI = TM->getTargetLowering(); + TLInfo = &getAnalysis<TargetLibraryInfo>(); + DominatorTreeWrapperPass *DTWP = + getAnalysisIfAvailable<DominatorTreeWrapperPass>(); + DT = DTWP ? &DTWP->getDomTree() : 0; + OptSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::OptimizeForSize); + + /// This optimization identifies DIV instructions that can be + /// profitably bypassed and carried out with a shorter, faster divide. + if (!OptSize && TLI && TLI->isSlowDivBypassed()) { + const DenseMap<unsigned int, unsigned int> &BypassWidths = + TLI->getBypassSlowDivWidths(); + for (Function::iterator I = F.begin(); I != F.end(); I++) + EverMadeChange |= bypassSlowDivision(F, I, BypassWidths); + } + + // Eliminate blocks that contain only PHI nodes and an + // unconditional branch. + EverMadeChange |= EliminateMostlyEmptyBlocks(F); + + // llvm.dbg.value is far away from the value then iSel may not be able + // handle it properly. iSel will drop llvm.dbg.value if it can not + // find a node corresponding to the value. + EverMadeChange |= PlaceDbgValues(F); + + // If there is a mask, compare against zero, and branch that can be combined + // into a single target instruction, push the mask and compare into branch + // users. Do this before OptimizeBlock -> OptimizeInst -> + // OptimizeCmpExpression, which perturbs the pattern being searched for. + if (!DisableBranchOpts) + EverMadeChange |= sinkAndCmp(F); + + bool MadeChange = true; + while (MadeChange) { + MadeChange = false; + for (Function::iterator I = F.begin(); I != F.end(); ) { + BasicBlock *BB = I++; + MadeChange |= OptimizeBlock(*BB); + } + EverMadeChange |= MadeChange; + } + + SunkAddrs.clear(); + + if (!DisableBranchOpts) { + MadeChange = false; + SmallPtrSet<BasicBlock*, 8> WorkList; + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB)); + MadeChange |= ConstantFoldTerminator(BB, true); + if (!MadeChange) continue; + + for (SmallVectorImpl<BasicBlock*>::iterator + II = Successors.begin(), IE = Successors.end(); II != IE; ++II) + if (pred_begin(*II) == pred_end(*II)) + WorkList.insert(*II); + } + + // Delete the dead blocks and any of their dead successors. + MadeChange |= !WorkList.empty(); + while (!WorkList.empty()) { + BasicBlock *BB = *WorkList.begin(); + WorkList.erase(BB); + SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB)); + + DeleteDeadBlock(BB); + + for (SmallVectorImpl<BasicBlock*>::iterator + II = Successors.begin(), IE = Successors.end(); II != IE; ++II) + if (pred_begin(*II) == pred_end(*II)) + WorkList.insert(*II); + } + + // Merge pairs of basic blocks with unconditional branches, connected by + // a single edge. + if (EverMadeChange || MadeChange) + MadeChange |= EliminateFallThrough(F); + + if (MadeChange) + ModifiedDT = true; + EverMadeChange |= MadeChange; + } + + if (ModifiedDT && DT) + DT->recalculate(F); + + return EverMadeChange; +} + +/// EliminateFallThrough - Merge basic blocks which are connected +/// by a single edge, where one of the basic blocks has a single successor +/// pointing to the other basic block, which has a single predecessor. +bool CodeGenPrepare::EliminateFallThrough(Function &F) { + bool Changed = false; + // Scan all of the blocks in the function, except for the entry block. + for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) { + BasicBlock *BB = I++; + // If the destination block has a single pred, then this is a trivial + // edge, just collapse it. + BasicBlock *SinglePred = BB->getSinglePredecessor(); + + // Don't merge if BB's address is taken. + if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue; + + BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator()); + if (Term && !Term->isConditional()) { + Changed = true; + DEBUG(dbgs() << "To merge:\n"<< *SinglePred << "\n\n\n"); + // Remember if SinglePred was the entry block of the function. + // If so, we will need to move BB back to the entry position. + bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); + MergeBasicBlockIntoOnlyPred(BB, this); + + if (isEntry && BB != &BB->getParent()->getEntryBlock()) + BB->moveBefore(&BB->getParent()->getEntryBlock()); + + // We have erased a block. Update the iterator. + I = BB; + } + } + return Changed; +} + +/// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes, +/// debug info directives, and an unconditional branch. Passes before isel +/// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for +/// isel. Start by eliminating these blocks so we can split them the way we +/// want them. +bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) { + bool MadeChange = false; + // Note that this intentionally skips the entry block. + for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) { + BasicBlock *BB = I++; + + // If this block doesn't end with an uncond branch, ignore it. + BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); + if (!BI || !BI->isUnconditional()) + continue; + + // If the instruction before the branch (skipping debug info) isn't a phi + // node, then other stuff is happening here. + BasicBlock::iterator BBI = BI; + if (BBI != BB->begin()) { + --BBI; + while (isa<DbgInfoIntrinsic>(BBI)) { + if (BBI == BB->begin()) + break; + --BBI; + } + if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI)) + continue; + } + + // Do not break infinite loops. + BasicBlock *DestBB = BI->getSuccessor(0); + if (DestBB == BB) + continue; + + if (!CanMergeBlocks(BB, DestBB)) + continue; + + EliminateMostlyEmptyBlock(BB); + MadeChange = true; + } + return MadeChange; +} + +/// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a +/// single uncond branch between them, and BB contains no other non-phi +/// instructions. +bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB, + const BasicBlock *DestBB) const { + // We only want to eliminate blocks whose phi nodes are used by phi nodes in + // the successor. If there are more complex condition (e.g. preheaders), + // don't mess around with them. + BasicBlock::const_iterator BBI = BB->begin(); + while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { + for (const User *U : PN->users()) { + const Instruction *UI = cast<Instruction>(U); + if (UI->getParent() != DestBB || !isa<PHINode>(UI)) + return false; + // If User is inside DestBB block and it is a PHINode then check + // incoming value. If incoming value is not from BB then this is + // a complex condition (e.g. preheaders) we want to avoid here. + if (UI->getParent() == DestBB) { + if (const PHINode *UPN = dyn_cast<PHINode>(UI)) + for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) { + Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I)); + if (Insn && Insn->getParent() == BB && + Insn->getParent() != UPN->getIncomingBlock(I)) + return false; + } + } + } + } + + // If BB and DestBB contain any common predecessors, then the phi nodes in BB + // and DestBB may have conflicting incoming values for the block. If so, we + // can't merge the block. + const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin()); + if (!DestBBPN) return true; // no conflict. + + // Collect the preds of BB. + SmallPtrSet<const BasicBlock*, 16> BBPreds; + if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { + // It is faster to get preds from a PHI than with pred_iterator. + for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) + BBPreds.insert(BBPN->getIncomingBlock(i)); + } else { + BBPreds.insert(pred_begin(BB), pred_end(BB)); + } + + // Walk the preds of DestBB. + for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) { + BasicBlock *Pred = DestBBPN->getIncomingBlock(i); + if (BBPreds.count(Pred)) { // Common predecessor? + BBI = DestBB->begin(); + while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { + const Value *V1 = PN->getIncomingValueForBlock(Pred); + const Value *V2 = PN->getIncomingValueForBlock(BB); + + // If V2 is a phi node in BB, look up what the mapped value will be. + if (const PHINode *V2PN = dyn_cast<PHINode>(V2)) + if (V2PN->getParent() == BB) + V2 = V2PN->getIncomingValueForBlock(Pred); + + // If there is a conflict, bail out. + if (V1 != V2) return false; + } + } + } + + return true; +} + + +/// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and +/// an unconditional branch in it. +void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { + BranchInst *BI = cast<BranchInst>(BB->getTerminator()); + BasicBlock *DestBB = BI->getSuccessor(0); + + DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB); + + // If the destination block has a single pred, then this is a trivial edge, + // just collapse it. + if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) { + if (SinglePred != DestBB) { + // Remember if SinglePred was the entry block of the function. If so, we + // will need to move BB back to the entry position. + bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); + MergeBasicBlockIntoOnlyPred(DestBB, this); + + if (isEntry && BB != &BB->getParent()->getEntryBlock()) + BB->moveBefore(&BB->getParent()->getEntryBlock()); + + DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); + return; + } + } + + // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB + // to handle the new incoming edges it is about to have. + PHINode *PN; + for (BasicBlock::iterator BBI = DestBB->begin(); + (PN = dyn_cast<PHINode>(BBI)); ++BBI) { + // Remove the incoming value for BB, and remember it. + Value *InVal = PN->removeIncomingValue(BB, false); + + // Two options: either the InVal is a phi node defined in BB or it is some + // value that dominates BB. + PHINode *InValPhi = dyn_cast<PHINode>(InVal); + if (InValPhi && InValPhi->getParent() == BB) { + // Add all of the input values of the input PHI as inputs of this phi. + for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i) + PN->addIncoming(InValPhi->getIncomingValue(i), + InValPhi->getIncomingBlock(i)); + } else { + // Otherwise, add one instance of the dominating value for each edge that + // we will be adding. + if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { + for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) + PN->addIncoming(InVal, BBPN->getIncomingBlock(i)); + } else { + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + PN->addIncoming(InVal, *PI); + } + } + } + + // The PHIs are now updated, change everything that refers to BB to use + // DestBB and remove BB. + BB->replaceAllUsesWith(DestBB); + if (DT && !ModifiedDT) { + BasicBlock *BBIDom = DT->getNode(BB)->getIDom()->getBlock(); + BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock(); + BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom); + DT->changeImmediateDominator(DestBB, NewIDom); + DT->eraseNode(BB); + } + BB->eraseFromParent(); + ++NumBlocksElim; + + DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); +} + +/// SinkCast - Sink the specified cast instruction into its user blocks +static bool SinkCast(CastInst *CI) { + BasicBlock *DefBB = CI->getParent(); + + /// InsertedCasts - Only insert a cast in each block once. + DenseMap<BasicBlock*, CastInst*> InsertedCasts; + + bool MadeChange = false; + for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end(); + UI != E; ) { + Use &TheUse = UI.getUse(); + Instruction *User = cast<Instruction>(*UI); + + // Figure out which BB this cast is used in. For PHI's this is the + // appropriate predecessor block. + BasicBlock *UserBB = User->getParent(); + if (PHINode *PN = dyn_cast<PHINode>(User)) { + UserBB = PN->getIncomingBlock(TheUse); + } + + // Preincrement use iterator so we don't invalidate it. + ++UI; + + // If this user is in the same block as the cast, don't change the cast. + if (UserBB == DefBB) continue; + + // If we have already inserted a cast into this block, use it. + CastInst *&InsertedCast = InsertedCasts[UserBB]; + + if (!InsertedCast) { + BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); + InsertedCast = + CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "", + InsertPt); + MadeChange = true; + } + + // Replace a use of the cast with a use of the new cast. + TheUse = InsertedCast; + ++NumCastUses; + } + + // If we removed all uses, nuke the cast. + if (CI->use_empty()) { + CI->eraseFromParent(); + MadeChange = true; + } + + return MadeChange; +} + +/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop +/// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC), +/// sink it into user blocks to reduce the number of virtual +/// registers that must be created and coalesced. +/// +/// Return true if any changes are made. +/// +static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){ + // If this is a noop copy, + EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType()); + EVT DstVT = TLI.getValueType(CI->getType()); + + // This is an fp<->int conversion? + if (SrcVT.isInteger() != DstVT.isInteger()) + return false; + + // If this is an extension, it will be a zero or sign extension, which + // isn't a noop. + if (SrcVT.bitsLT(DstVT)) return false; + + // If these values will be promoted, find out what they will be promoted + // to. This helps us consider truncates on PPC as noop copies when they + // are. + if (TLI.getTypeAction(CI->getContext(), SrcVT) == + TargetLowering::TypePromoteInteger) + SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT); + if (TLI.getTypeAction(CI->getContext(), DstVT) == + TargetLowering::TypePromoteInteger) + DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT); + + // If, after promotion, these are the same types, this is a noop copy. + if (SrcVT != DstVT) + return false; + + return SinkCast(CI); +} + +/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce +/// the number of virtual registers that must be created and coalesced. This is +/// a clear win except on targets with multiple condition code registers +/// (PowerPC), where it might lose; some adjustment may be wanted there. +/// +/// Return true if any changes are made. +static bool OptimizeCmpExpression(CmpInst *CI) { + BasicBlock *DefBB = CI->getParent(); + + /// InsertedCmp - Only insert a cmp in each block once. + DenseMap<BasicBlock*, CmpInst*> InsertedCmps; + + bool MadeChange = false; + for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end(); + UI != E; ) { + Use &TheUse = UI.getUse(); + Instruction *User = cast<Instruction>(*UI); + + // Preincrement use iterator so we don't invalidate it. + ++UI; + + // Don't bother for PHI nodes. + if (isa<PHINode>(User)) + continue; + + // Figure out which BB this cmp is used in. + BasicBlock *UserBB = User->getParent(); + + // If this user is in the same block as the cmp, don't change the cmp. + if (UserBB == DefBB) continue; + + // If we have already inserted a cmp into this block, use it. + CmpInst *&InsertedCmp = InsertedCmps[UserBB]; + + if (!InsertedCmp) { + BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); + InsertedCmp = + CmpInst::Create(CI->getOpcode(), + CI->getPredicate(), CI->getOperand(0), + CI->getOperand(1), "", InsertPt); + MadeChange = true; + } + + // Replace a use of the cmp with a use of the new cmp. + TheUse = InsertedCmp; + ++NumCmpUses; + } + + // If we removed all uses, nuke the cmp. + if (CI->use_empty()) + CI->eraseFromParent(); + + return MadeChange; +} + +namespace { +class CodeGenPrepareFortifiedLibCalls : public SimplifyFortifiedLibCalls { +protected: + void replaceCall(Value *With) override { + CI->replaceAllUsesWith(With); + CI->eraseFromParent(); + } + bool isFoldable(unsigned SizeCIOp, unsigned, bool) const override { + if (ConstantInt *SizeCI = + dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) + return SizeCI->isAllOnesValue(); + return false; + } +}; +} // end anonymous namespace + +bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { + BasicBlock *BB = CI->getParent(); + + // Lower inline assembly if we can. + // If we found an inline asm expession, and if the target knows how to + // lower it to normal LLVM code, do so now. + if (TLI && isa<InlineAsm>(CI->getCalledValue())) { + if (TLI->ExpandInlineAsm(CI)) { + // Avoid invalidating the iterator. + CurInstIterator = BB->begin(); + // Avoid processing instructions out of order, which could cause + // reuse before a value is defined. + SunkAddrs.clear(); + return true; + } + // Sink address computing for memory operands into the block. + if (OptimizeInlineAsmInst(CI)) + return true; + } + + // Lower all uses of llvm.objectsize.* + IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); + if (II && II->getIntrinsicID() == Intrinsic::objectsize) { + bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1); + Type *ReturnTy = CI->getType(); + Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL); + + // Substituting this can cause recursive simplifications, which can + // invalidate our iterator. Use a WeakVH to hold onto it in case this + // happens. + WeakVH IterHandle(CurInstIterator); + + replaceAndRecursivelySimplify(CI, RetVal, TLI ? TLI->getDataLayout() : 0, + TLInfo, ModifiedDT ? 0 : DT); + + // If the iterator instruction was recursively deleted, start over at the + // start of the block. + if (IterHandle != CurInstIterator) { + CurInstIterator = BB->begin(); + SunkAddrs.clear(); + } + return true; + } + + if (II && TLI) { + SmallVector<Value*, 2> PtrOps; + Type *AccessTy; + if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy)) + while (!PtrOps.empty()) + if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy)) + return true; + } + + // From here on out we're working with named functions. + if (CI->getCalledFunction() == 0) return false; + + // We'll need DataLayout from here on out. + const DataLayout *TD = TLI ? TLI->getDataLayout() : 0; + if (!TD) return false; + + // Lower all default uses of _chk calls. This is very similar + // to what InstCombineCalls does, but here we are only lowering calls + // that have the default "don't know" as the objectsize. Anything else + // should be left alone. + CodeGenPrepareFortifiedLibCalls Simplifier; + return Simplifier.fold(CI, TD, TLInfo); +} + +/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return +/// instructions to the predecessor to enable tail call optimizations. The +/// case it is currently looking for is: +/// @code +/// bb0: +/// %tmp0 = tail call i32 @f0() +/// br label %return +/// bb1: +/// %tmp1 = tail call i32 @f1() +/// br label %return +/// bb2: +/// %tmp2 = tail call i32 @f2() +/// br label %return +/// return: +/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ] +/// ret i32 %retval +/// @endcode +/// +/// => +/// +/// @code +/// bb0: +/// %tmp0 = tail call i32 @f0() +/// ret i32 %tmp0 +/// bb1: +/// %tmp1 = tail call i32 @f1() +/// ret i32 %tmp1 +/// bb2: +/// %tmp2 = tail call i32 @f2() +/// ret i32 %tmp2 +/// @endcode +bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) { + if (!TLI) + return false; + + ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()); + if (!RI) + return false; + + PHINode *PN = 0; + BitCastInst *BCI = 0; + Value *V = RI->getReturnValue(); + if (V) { + BCI = dyn_cast<BitCastInst>(V); + if (BCI) + V = BCI->getOperand(0); + + PN = dyn_cast<PHINode>(V); + if (!PN) + return false; + } + + if (PN && PN->getParent() != BB) + return false; + + // It's not safe to eliminate the sign / zero extension of the return value. + // See llvm::isInTailCallPosition(). + const Function *F = BB->getParent(); + AttributeSet CallerAttrs = F->getAttributes(); + if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) || + CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) + return false; + + // Make sure there are no instructions between the PHI and return, or that the + // return is the first instruction in the block. + if (PN) { + BasicBlock::iterator BI = BB->begin(); + do { ++BI; } while (isa<DbgInfoIntrinsic>(BI)); + if (&*BI == BCI) + // Also skip over the bitcast. + ++BI; + if (&*BI != RI) + return false; + } else { + BasicBlock::iterator BI = BB->begin(); + while (isa<DbgInfoIntrinsic>(BI)) ++BI; + if (&*BI != RI) + return false; + } + + /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail + /// call. + SmallVector<CallInst*, 4> TailCalls; + if (PN) { + for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) { + CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I)); + // Make sure the phi value is indeed produced by the tail call. + if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) && + TLI->mayBeEmittedAsTailCall(CI)) + TailCalls.push_back(CI); + } + } else { + SmallPtrSet<BasicBlock*, 4> VisitedBBs; + for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { + if (!VisitedBBs.insert(*PI)) + continue; + + BasicBlock::InstListType &InstList = (*PI)->getInstList(); + BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin(); + BasicBlock::InstListType::reverse_iterator RE = InstList.rend(); + do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI)); + if (RI == RE) + continue; + + CallInst *CI = dyn_cast<CallInst>(&*RI); + if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI)) + TailCalls.push_back(CI); + } + } + + bool Changed = false; + for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) { + CallInst *CI = TailCalls[i]; + CallSite CS(CI); + + // Conservatively require the attributes of the call to match those of the + // return. Ignore noalias because it doesn't affect the call sequence. + AttributeSet CalleeAttrs = CS.getAttributes(); + if (AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex). + removeAttribute(Attribute::NoAlias) != + AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex). + removeAttribute(Attribute::NoAlias)) + continue; + + // Make sure the call instruction is followed by an unconditional branch to + // the return block. + BasicBlock *CallBB = CI->getParent(); + BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator()); + if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB) + continue; + + // Duplicate the return into CallBB. + (void)FoldReturnIntoUncondBranch(RI, BB, CallBB); + ModifiedDT = Changed = true; + ++NumRetsDup; + } + + // If we eliminated all predecessors of the block, delete the block now. + if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB)) + BB->eraseFromParent(); + + return Changed; +} + +//===----------------------------------------------------------------------===// +// Memory Optimization +//===----------------------------------------------------------------------===// + +namespace { + +/// ExtAddrMode - This is an extended version of TargetLowering::AddrMode +/// which holds actual Value*'s for register values. +struct ExtAddrMode : public TargetLowering::AddrMode { + Value *BaseReg; + Value *ScaledReg; + ExtAddrMode() : BaseReg(0), ScaledReg(0) {} + void print(raw_ostream &OS) const; + void dump() const; + + bool operator==(const ExtAddrMode& O) const { + return (BaseReg == O.BaseReg) && (ScaledReg == O.ScaledReg) && + (BaseGV == O.BaseGV) && (BaseOffs == O.BaseOffs) && + (HasBaseReg == O.HasBaseReg) && (Scale == O.Scale); + } +}; + +#ifndef NDEBUG +static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) { + AM.print(OS); + return OS; +} +#endif + +void ExtAddrMode::print(raw_ostream &OS) const { + bool NeedPlus = false; + OS << "["; + if (BaseGV) { + OS << (NeedPlus ? " + " : "") + << "GV:"; + BaseGV->printAsOperand(OS, /*PrintType=*/false); + NeedPlus = true; + } + + if (BaseOffs) + OS << (NeedPlus ? " + " : "") << BaseOffs, NeedPlus = true; + + if (BaseReg) { + OS << (NeedPlus ? " + " : "") + << "Base:"; + BaseReg->printAsOperand(OS, /*PrintType=*/false); + NeedPlus = true; + } + if (Scale) { + OS << (NeedPlus ? " + " : "") + << Scale << "*"; + ScaledReg->printAsOperand(OS, /*PrintType=*/false); + } + + OS << ']'; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void ExtAddrMode::dump() const { + print(dbgs()); + dbgs() << '\n'; +} +#endif + +/// \brief This class provides transaction based operation on the IR. +/// Every change made through this class is recorded in the internal state and +/// can be undone (rollback) until commit is called. +class TypePromotionTransaction { + + /// \brief This represents the common interface of the individual transaction. + /// Each class implements the logic for doing one specific modification on + /// the IR via the TypePromotionTransaction. + class TypePromotionAction { + protected: + /// The Instruction modified. + Instruction *Inst; + + public: + /// \brief Constructor of the action. + /// The constructor performs the related action on the IR. + TypePromotionAction(Instruction *Inst) : Inst(Inst) {} + + virtual ~TypePromotionAction() {} + + /// \brief Undo the modification done by this action. + /// When this method is called, the IR must be in the same state as it was + /// before this action was applied. + /// \pre Undoing the action works if and only if the IR is in the exact same + /// state as it was directly after this action was applied. + virtual void undo() = 0; + + /// \brief Advocate every change made by this action. + /// When the results on the IR of the action are to be kept, it is important + /// to call this function, otherwise hidden information may be kept forever. + virtual void commit() { + // Nothing to be done, this action is not doing anything. + } + }; + + /// \brief Utility to remember the position of an instruction. + class InsertionHandler { + /// Position of an instruction. + /// Either an instruction: + /// - Is the first in a basic block: BB is used. + /// - Has a previous instructon: PrevInst is used. + union { + Instruction *PrevInst; + BasicBlock *BB; + } Point; + /// Remember whether or not the instruction had a previous instruction. + bool HasPrevInstruction; + + public: + /// \brief Record the position of \p Inst. + InsertionHandler(Instruction *Inst) { + BasicBlock::iterator It = Inst; + HasPrevInstruction = (It != (Inst->getParent()->begin())); + if (HasPrevInstruction) + Point.PrevInst = --It; + else + Point.BB = Inst->getParent(); + } + + /// \brief Insert \p Inst at the recorded position. + void insert(Instruction *Inst) { + if (HasPrevInstruction) { + if (Inst->getParent()) + Inst->removeFromParent(); + Inst->insertAfter(Point.PrevInst); + } else { + Instruction *Position = Point.BB->getFirstInsertionPt(); + if (Inst->getParent()) + Inst->moveBefore(Position); + else + Inst->insertBefore(Position); + } + } + }; + + /// \brief Move an instruction before another. + class InstructionMoveBefore : public TypePromotionAction { + /// Original position of the instruction. + InsertionHandler Position; + + public: + /// \brief Move \p Inst before \p Before. + InstructionMoveBefore(Instruction *Inst, Instruction *Before) + : TypePromotionAction(Inst), Position(Inst) { + DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before << "\n"); + Inst->moveBefore(Before); + } + + /// \brief Move the instruction back to its original position. + void undo() override { + DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n"); + Position.insert(Inst); + } + }; + + /// \brief Set the operand of an instruction with a new value. + class OperandSetter : public TypePromotionAction { + /// Original operand of the instruction. + Value *Origin; + /// Index of the modified instruction. + unsigned Idx; + + public: + /// \brief Set \p Idx operand of \p Inst with \p NewVal. + OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal) + : TypePromotionAction(Inst), Idx(Idx) { + DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n" + << "for:" << *Inst << "\n" + << "with:" << *NewVal << "\n"); + Origin = Inst->getOperand(Idx); + Inst->setOperand(Idx, NewVal); + } + + /// \brief Restore the original value of the instruction. + void undo() override { + DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n" + << "for: " << *Inst << "\n" + << "with: " << *Origin << "\n"); + Inst->setOperand(Idx, Origin); + } + }; + + /// \brief Hide the operands of an instruction. + /// Do as if this instruction was not using any of its operands. + class OperandsHider : public TypePromotionAction { + /// The list of original operands. + SmallVector<Value *, 4> OriginalValues; + + public: + /// \brief Remove \p Inst from the uses of the operands of \p Inst. + OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) { + DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n"); + unsigned NumOpnds = Inst->getNumOperands(); + OriginalValues.reserve(NumOpnds); + for (unsigned It = 0; It < NumOpnds; ++It) { + // Save the current operand. + Value *Val = Inst->getOperand(It); + OriginalValues.push_back(Val); + // Set a dummy one. + // We could use OperandSetter here, but that would implied an overhead + // that we are not willing to pay. + Inst->setOperand(It, UndefValue::get(Val->getType())); + } + } + + /// \brief Restore the original list of uses. + void undo() override { + DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n"); + for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It) + Inst->setOperand(It, OriginalValues[It]); + } + }; + + /// \brief Build a truncate instruction. + class TruncBuilder : public TypePromotionAction { + public: + /// \brief Build a truncate instruction of \p Opnd producing a \p Ty + /// result. + /// trunc Opnd to Ty. + TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) { + IRBuilder<> Builder(Opnd); + Inst = cast<Instruction>(Builder.CreateTrunc(Opnd, Ty, "promoted")); + DEBUG(dbgs() << "Do: TruncBuilder: " << *Inst << "\n"); + } + + /// \brief Get the built instruction. + Instruction *getBuiltInstruction() { return Inst; } + + /// \brief Remove the built instruction. + void undo() override { + DEBUG(dbgs() << "Undo: TruncBuilder: " << *Inst << "\n"); + Inst->eraseFromParent(); + } + }; + + /// \brief Build a sign extension instruction. + class SExtBuilder : public TypePromotionAction { + public: + /// \brief Build a sign extension instruction of \p Opnd producing a \p Ty + /// result. + /// sext Opnd to Ty. + SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty) + : TypePromotionAction(Inst) { + IRBuilder<> Builder(InsertPt); + Inst = cast<Instruction>(Builder.CreateSExt(Opnd, Ty, "promoted")); + DEBUG(dbgs() << "Do: SExtBuilder: " << *Inst << "\n"); + } + + /// \brief Get the built instruction. + Instruction *getBuiltInstruction() { return Inst; } + + /// \brief Remove the built instruction. + void undo() override { + DEBUG(dbgs() << "Undo: SExtBuilder: " << *Inst << "\n"); + Inst->eraseFromParent(); + } + }; + + /// \brief Mutate an instruction to another type. + class TypeMutator : public TypePromotionAction { + /// Record the original type. + Type *OrigTy; + + public: + /// \brief Mutate the type of \p Inst into \p NewTy. + TypeMutator(Instruction *Inst, Type *NewTy) + : TypePromotionAction(Inst), OrigTy(Inst->getType()) { + DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy + << "\n"); + Inst->mutateType(NewTy); + } + + /// \brief Mutate the instruction back to its original type. + void undo() override { + DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy + << "\n"); + Inst->mutateType(OrigTy); + } + }; + + /// \brief Replace the uses of an instruction by another instruction. + class UsesReplacer : public TypePromotionAction { + /// Helper structure to keep track of the replaced uses. + struct InstructionAndIdx { + /// The instruction using the instruction. + Instruction *Inst; + /// The index where this instruction is used for Inst. + unsigned Idx; + InstructionAndIdx(Instruction *Inst, unsigned Idx) + : Inst(Inst), Idx(Idx) {} + }; + + /// Keep track of the original uses (pair Instruction, Index). + SmallVector<InstructionAndIdx, 4> OriginalUses; + typedef SmallVectorImpl<InstructionAndIdx>::iterator use_iterator; + + public: + /// \brief Replace all the use of \p Inst by \p New. + UsesReplacer(Instruction *Inst, Value *New) : TypePromotionAction(Inst) { + DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New + << "\n"); + // Record the original uses. + for (Use &U : Inst->uses()) { + Instruction *UserI = cast<Instruction>(U.getUser()); + OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo())); + } + // Now, we can replace the uses. + Inst->replaceAllUsesWith(New); + } + + /// \brief Reassign the original uses of Inst to Inst. + void undo() override { + DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n"); + for (use_iterator UseIt = OriginalUses.begin(), + EndIt = OriginalUses.end(); + UseIt != EndIt; ++UseIt) { + UseIt->Inst->setOperand(UseIt->Idx, Inst); + } + } + }; + + /// \brief Remove an instruction from the IR. + class InstructionRemover : public TypePromotionAction { + /// Original position of the instruction. + InsertionHandler Inserter; + /// Helper structure to hide all the link to the instruction. In other + /// words, this helps to do as if the instruction was removed. + OperandsHider Hider; + /// Keep track of the uses replaced, if any. + UsesReplacer *Replacer; + + public: + /// \brief Remove all reference of \p Inst and optinally replace all its + /// uses with New. + /// \pre If !Inst->use_empty(), then New != NULL + InstructionRemover(Instruction *Inst, Value *New = NULL) + : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst), + Replacer(NULL) { + if (New) + Replacer = new UsesReplacer(Inst, New); + DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n"); + Inst->removeFromParent(); + } + + ~InstructionRemover() { delete Replacer; } + + /// \brief Really remove the instruction. + void commit() override { delete Inst; } + + /// \brief Resurrect the instruction and reassign it to the proper uses if + /// new value was provided when build this action. + void undo() override { + DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n"); + Inserter.insert(Inst); + if (Replacer) + Replacer->undo(); + Hider.undo(); + } + }; + +public: + /// Restoration point. + /// The restoration point is a pointer to an action instead of an iterator + /// because the iterator may be invalidated but not the pointer. + typedef const TypePromotionAction *ConstRestorationPt; + /// Advocate every changes made in that transaction. + void commit(); + /// Undo all the changes made after the given point. + void rollback(ConstRestorationPt Point); + /// Get the current restoration point. + ConstRestorationPt getRestorationPoint() const; + + /// \name API for IR modification with state keeping to support rollback. + /// @{ + /// Same as Instruction::setOperand. + void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal); + /// Same as Instruction::eraseFromParent. + void eraseInstruction(Instruction *Inst, Value *NewVal = NULL); + /// Same as Value::replaceAllUsesWith. + void replaceAllUsesWith(Instruction *Inst, Value *New); + /// Same as Value::mutateType. + void mutateType(Instruction *Inst, Type *NewTy); + /// Same as IRBuilder::createTrunc. + Instruction *createTrunc(Instruction *Opnd, Type *Ty); + /// Same as IRBuilder::createSExt. + Instruction *createSExt(Instruction *Inst, Value *Opnd, Type *Ty); + /// Same as Instruction::moveBefore. + void moveBefore(Instruction *Inst, Instruction *Before); + /// @} + + ~TypePromotionTransaction(); + +private: + /// The ordered list of actions made so far. + SmallVector<TypePromotionAction *, 16> Actions; + typedef SmallVectorImpl<TypePromotionAction *>::iterator CommitPt; +}; + +void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx, + Value *NewVal) { + Actions.push_back( + new TypePromotionTransaction::OperandSetter(Inst, Idx, NewVal)); +} + +void TypePromotionTransaction::eraseInstruction(Instruction *Inst, + Value *NewVal) { + Actions.push_back( + new TypePromotionTransaction::InstructionRemover(Inst, NewVal)); +} + +void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst, + Value *New) { + Actions.push_back(new TypePromotionTransaction::UsesReplacer(Inst, New)); +} + +void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) { + Actions.push_back(new TypePromotionTransaction::TypeMutator(Inst, NewTy)); +} + +Instruction *TypePromotionTransaction::createTrunc(Instruction *Opnd, + Type *Ty) { + TruncBuilder *TB = new TruncBuilder(Opnd, Ty); + Actions.push_back(TB); + return TB->getBuiltInstruction(); +} + +Instruction *TypePromotionTransaction::createSExt(Instruction *Inst, + Value *Opnd, Type *Ty) { + SExtBuilder *SB = new SExtBuilder(Inst, Opnd, Ty); + Actions.push_back(SB); + return SB->getBuiltInstruction(); +} + +void TypePromotionTransaction::moveBefore(Instruction *Inst, + Instruction *Before) { + Actions.push_back( + new TypePromotionTransaction::InstructionMoveBefore(Inst, Before)); +} + +TypePromotionTransaction::ConstRestorationPt +TypePromotionTransaction::getRestorationPoint() const { + return Actions.rbegin() != Actions.rend() ? *Actions.rbegin() : NULL; +} + +void TypePromotionTransaction::commit() { + for (CommitPt It = Actions.begin(), EndIt = Actions.end(); It != EndIt; + ++It) { + (*It)->commit(); + delete *It; + } + Actions.clear(); +} + +void TypePromotionTransaction::rollback( + TypePromotionTransaction::ConstRestorationPt Point) { + while (!Actions.empty() && Point != (*Actions.rbegin())) { + TypePromotionAction *Curr = Actions.pop_back_val(); + Curr->undo(); + delete Curr; + } +} + +TypePromotionTransaction::~TypePromotionTransaction() { + for (CommitPt It = Actions.begin(), EndIt = Actions.end(); It != EndIt; ++It) + delete *It; + Actions.clear(); +} + +/// \brief A helper class for matching addressing modes. +/// +/// This encapsulates the logic for matching the target-legal addressing modes. +class AddressingModeMatcher { + SmallVectorImpl<Instruction*> &AddrModeInsts; + const TargetLowering &TLI; + + /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and + /// the memory instruction that we're computing this address for. + Type *AccessTy; + Instruction *MemoryInst; + + /// AddrMode - This is the addressing mode that we're building up. This is + /// part of the return value of this addressing mode matching stuff. + ExtAddrMode &AddrMode; + + /// The truncate instruction inserted by other CodeGenPrepare optimizations. + const SetOfInstrs &InsertedTruncs; + /// A map from the instructions to their type before promotion. + InstrToOrigTy &PromotedInsts; + /// The ongoing transaction where every action should be registered. + TypePromotionTransaction &TPT; + + /// IgnoreProfitability - This is set to true when we should not do + /// profitability checks. When true, IsProfitableToFoldIntoAddressingMode + /// always returns true. + bool IgnoreProfitability; + + AddressingModeMatcher(SmallVectorImpl<Instruction*> &AMI, + const TargetLowering &T, Type *AT, + Instruction *MI, ExtAddrMode &AM, + const SetOfInstrs &InsertedTruncs, + InstrToOrigTy &PromotedInsts, + TypePromotionTransaction &TPT) + : AddrModeInsts(AMI), TLI(T), AccessTy(AT), MemoryInst(MI), AddrMode(AM), + InsertedTruncs(InsertedTruncs), PromotedInsts(PromotedInsts), TPT(TPT) { + IgnoreProfitability = false; + } +public: + + /// Match - Find the maximal addressing mode that a load/store of V can fold, + /// give an access type of AccessTy. This returns a list of involved + /// instructions in AddrModeInsts. + /// \p InsertedTruncs The truncate instruction inserted by other + /// CodeGenPrepare + /// optimizations. + /// \p PromotedInsts maps the instructions to their type before promotion. + /// \p The ongoing transaction where every action should be registered. + static ExtAddrMode Match(Value *V, Type *AccessTy, + Instruction *MemoryInst, + SmallVectorImpl<Instruction*> &AddrModeInsts, + const TargetLowering &TLI, + const SetOfInstrs &InsertedTruncs, + InstrToOrigTy &PromotedInsts, + TypePromotionTransaction &TPT) { + ExtAddrMode Result; + + bool Success = AddressingModeMatcher(AddrModeInsts, TLI, AccessTy, + MemoryInst, Result, InsertedTruncs, + PromotedInsts, TPT).MatchAddr(V, 0); + (void)Success; assert(Success && "Couldn't select *anything*?"); + return Result; + } +private: + bool MatchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth); + bool MatchAddr(Value *V, unsigned Depth); + bool MatchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth, + bool *MovedAway = NULL); + bool IsProfitableToFoldIntoAddressingMode(Instruction *I, + ExtAddrMode &AMBefore, + ExtAddrMode &AMAfter); + bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2); + bool IsPromotionProfitable(unsigned MatchedSize, unsigned SizeWithPromotion, + Value *PromotedOperand) const; +}; + +/// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode. +/// Return true and update AddrMode if this addr mode is legal for the target, +/// false if not. +bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale, + unsigned Depth) { + // If Scale is 1, then this is the same as adding ScaleReg to the addressing + // mode. Just process that directly. + if (Scale == 1) + return MatchAddr(ScaleReg, Depth); + + // If the scale is 0, it takes nothing to add this. + if (Scale == 0) + return true; + + // If we already have a scale of this value, we can add to it, otherwise, we + // need an available scale field. + if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg) + return false; + + ExtAddrMode TestAddrMode = AddrMode; + + // Add scale to turn X*4+X*3 -> X*7. This could also do things like + // [A+B + A*7] -> [B+A*8]. + TestAddrMode.Scale += Scale; + TestAddrMode.ScaledReg = ScaleReg; + + // If the new address isn't legal, bail out. + if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) + return false; + + // It was legal, so commit it. + AddrMode = TestAddrMode; + + // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now + // to see if ScaleReg is actually X+C. If so, we can turn this into adding + // X*Scale + C*Scale to addr mode. + ConstantInt *CI = 0; Value *AddLHS = 0; + if (isa<Instruction>(ScaleReg) && // not a constant expr. + match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) { + TestAddrMode.ScaledReg = AddLHS; + TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale; + + // If this addressing mode is legal, commit it and remember that we folded + // this instruction. + if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) { + AddrModeInsts.push_back(cast<Instruction>(ScaleReg)); + AddrMode = TestAddrMode; + return true; + } + } + + // Otherwise, not (x+c)*scale, just return what we have. + return true; +} + +/// MightBeFoldableInst - This is a little filter, which returns true if an +/// addressing computation involving I might be folded into a load/store +/// accessing it. This doesn't need to be perfect, but needs to accept at least +/// the set of instructions that MatchOperationAddr can. +static bool MightBeFoldableInst(Instruction *I) { + switch (I->getOpcode()) { + case Instruction::BitCast: + // Don't touch identity bitcasts. + if (I->getType() == I->getOperand(0)->getType()) + return false; + return I->getType()->isPointerTy() || I->getType()->isIntegerTy(); + case Instruction::PtrToInt: + // PtrToInt is always a noop, as we know that the int type is pointer sized. + return true; + case Instruction::IntToPtr: + // We know the input is intptr_t, so this is foldable. + return true; + case Instruction::Add: + return true; + case Instruction::Mul: + case Instruction::Shl: + // Can only handle X*C and X << C. + return isa<ConstantInt>(I->getOperand(1)); + case Instruction::GetElementPtr: + return true; + default: + return false; + } +} + +/// \brief Hepler class to perform type promotion. +class TypePromotionHelper { + /// \brief Utility function to check whether or not a sign extension of + /// \p Inst with \p ConsideredSExtType can be moved through \p Inst by either + /// using the operands of \p Inst or promoting \p Inst. + /// In other words, check if: + /// sext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredSExtType. + /// #1 Promotion applies: + /// ConsideredSExtType Inst (sext opnd1 to ConsideredSExtType, ...). + /// #2 Operand reuses: + /// sext opnd1 to ConsideredSExtType. + /// \p PromotedInsts maps the instructions to their type before promotion. + static bool canGetThrough(const Instruction *Inst, Type *ConsideredSExtType, + const InstrToOrigTy &PromotedInsts); + + /// \brief Utility function to determine if \p OpIdx should be promoted when + /// promoting \p Inst. + static bool shouldSExtOperand(const Instruction *Inst, int OpIdx) { + if (isa<SelectInst>(Inst) && OpIdx == 0) + return false; + return true; + } + + /// \brief Utility function to promote the operand of \p SExt when this + /// operand is a promotable trunc or sext. + /// \p PromotedInsts maps the instructions to their type before promotion. + /// \p CreatedInsts[out] contains how many non-free instructions have been + /// created to promote the operand of SExt. + /// Should never be called directly. + /// \return The promoted value which is used instead of SExt. + static Value *promoteOperandForTruncAndSExt(Instruction *SExt, + TypePromotionTransaction &TPT, + InstrToOrigTy &PromotedInsts, + unsigned &CreatedInsts); + + /// \brief Utility function to promote the operand of \p SExt when this + /// operand is promotable and is not a supported trunc or sext. + /// \p PromotedInsts maps the instructions to their type before promotion. + /// \p CreatedInsts[out] contains how many non-free instructions have been + /// created to promote the operand of SExt. + /// Should never be called directly. + /// \return The promoted value which is used instead of SExt. + static Value *promoteOperandForOther(Instruction *SExt, + TypePromotionTransaction &TPT, + InstrToOrigTy &PromotedInsts, + unsigned &CreatedInsts); + +public: + /// Type for the utility function that promotes the operand of SExt. + typedef Value *(*Action)(Instruction *SExt, TypePromotionTransaction &TPT, + InstrToOrigTy &PromotedInsts, + unsigned &CreatedInsts); + /// \brief Given a sign extend instruction \p SExt, return the approriate + /// action to promote the operand of \p SExt instead of using SExt. + /// \return NULL if no promotable action is possible with the current + /// sign extension. + /// \p InsertedTruncs keeps track of all the truncate instructions inserted by + /// the others CodeGenPrepare optimizations. This information is important + /// because we do not want to promote these instructions as CodeGenPrepare + /// will reinsert them later. Thus creating an infinite loop: create/remove. + /// \p PromotedInsts maps the instructions to their type before promotion. + static Action getAction(Instruction *SExt, const SetOfInstrs &InsertedTruncs, + const TargetLowering &TLI, + const InstrToOrigTy &PromotedInsts); +}; + +bool TypePromotionHelper::canGetThrough(const Instruction *Inst, + Type *ConsideredSExtType, + const InstrToOrigTy &PromotedInsts) { + // We can always get through sext. + if (isa<SExtInst>(Inst)) + return true; + + // We can get through binary operator, if it is legal. In other words, the + // binary operator must have a nuw or nsw flag. + const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst); + if (BinOp && isa<OverflowingBinaryOperator>(BinOp) && + (BinOp->hasNoUnsignedWrap() || BinOp->hasNoSignedWrap())) + return true; + + // Check if we can do the following simplification. + // sext(trunc(sext)) --> sext + if (!isa<TruncInst>(Inst)) + return false; + + Value *OpndVal = Inst->getOperand(0); + // Check if we can use this operand in the sext. + // If the type is larger than the result type of the sign extension, + // we cannot. + if (OpndVal->getType()->getIntegerBitWidth() > + ConsideredSExtType->getIntegerBitWidth()) + return false; + + // If the operand of the truncate is not an instruction, we will not have + // any information on the dropped bits. + // (Actually we could for constant but it is not worth the extra logic). + Instruction *Opnd = dyn_cast<Instruction>(OpndVal); + if (!Opnd) + return false; + + // Check if the source of the type is narrow enough. + // I.e., check that trunc just drops sign extended bits. + // #1 get the type of the operand. + const Type *OpndType; + InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd); + if (It != PromotedInsts.end()) + OpndType = It->second; + else if (isa<SExtInst>(Opnd)) + OpndType = cast<Instruction>(Opnd)->getOperand(0)->getType(); + else + return false; + + // #2 check that the truncate just drop sign extended bits. + if (Inst->getType()->getIntegerBitWidth() >= OpndType->getIntegerBitWidth()) + return true; + + return false; +} + +TypePromotionHelper::Action TypePromotionHelper::getAction( + Instruction *SExt, const SetOfInstrs &InsertedTruncs, + const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) { + Instruction *SExtOpnd = dyn_cast<Instruction>(SExt->getOperand(0)); + Type *SExtTy = SExt->getType(); + // If the operand of the sign extension is not an instruction, we cannot + // get through. + // If it, check we can get through. + if (!SExtOpnd || !canGetThrough(SExtOpnd, SExtTy, PromotedInsts)) + return NULL; + + // Do not promote if the operand has been added by codegenprepare. + // Otherwise, it means we are undoing an optimization that is likely to be + // redone, thus causing potential infinite loop. + if (isa<TruncInst>(SExtOpnd) && InsertedTruncs.count(SExtOpnd)) + return NULL; + + // SExt or Trunc instructions. + // Return the related handler. + if (isa<SExtInst>(SExtOpnd) || isa<TruncInst>(SExtOpnd)) + return promoteOperandForTruncAndSExt; + + // Regular instruction. + // Abort early if we will have to insert non-free instructions. + if (!SExtOpnd->hasOneUse() && + !TLI.isTruncateFree(SExtTy, SExtOpnd->getType())) + return NULL; + return promoteOperandForOther; +} + +Value *TypePromotionHelper::promoteOperandForTruncAndSExt( + llvm::Instruction *SExt, TypePromotionTransaction &TPT, + InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts) { + // By construction, the operand of SExt is an instruction. Otherwise we cannot + // get through it and this method should not be called. + Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0)); + // Replace sext(trunc(opnd)) or sext(sext(opnd)) + // => sext(opnd). + TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0)); + CreatedInsts = 0; + + // Remove dead code. + if (SExtOpnd->use_empty()) + TPT.eraseInstruction(SExtOpnd); + + // Check if the sext is still needed. + if (SExt->getType() != SExt->getOperand(0)->getType()) + return SExt; + + // At this point we have: sext ty opnd to ty. + // Reassign the uses of SExt to the opnd and remove SExt. + Value *NextVal = SExt->getOperand(0); + TPT.eraseInstruction(SExt, NextVal); + return NextVal; +} + +Value * +TypePromotionHelper::promoteOperandForOther(Instruction *SExt, + TypePromotionTransaction &TPT, + InstrToOrigTy &PromotedInsts, + unsigned &CreatedInsts) { + // By construction, the operand of SExt is an instruction. Otherwise we cannot + // get through it and this method should not be called. + Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0)); + CreatedInsts = 0; + if (!SExtOpnd->hasOneUse()) { + // SExtOpnd will be promoted. + // All its uses, but SExt, will need to use a truncated value of the + // promoted version. + // Create the truncate now. + Instruction *Trunc = TPT.createTrunc(SExt, SExtOpnd->getType()); + Trunc->removeFromParent(); + // Insert it just after the definition. + Trunc->insertAfter(SExtOpnd); + + TPT.replaceAllUsesWith(SExtOpnd, Trunc); + // Restore the operand of SExt (which has been replace by the previous call + // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext. + TPT.setOperand(SExt, 0, SExtOpnd); + } + + // Get through the Instruction: + // 1. Update its type. + // 2. Replace the uses of SExt by Inst. + // 3. Sign extend each operand that needs to be sign extended. + + // Remember the original type of the instruction before promotion. + // This is useful to know that the high bits are sign extended bits. + PromotedInsts.insert( + std::pair<Instruction *, Type *>(SExtOpnd, SExtOpnd->getType())); + // Step #1. + TPT.mutateType(SExtOpnd, SExt->getType()); + // Step #2. + TPT.replaceAllUsesWith(SExt, SExtOpnd); + // Step #3. + Instruction *SExtForOpnd = SExt; + + DEBUG(dbgs() << "Propagate SExt to operands\n"); + for (int OpIdx = 0, EndOpIdx = SExtOpnd->getNumOperands(); OpIdx != EndOpIdx; + ++OpIdx) { + DEBUG(dbgs() << "Operand:\n" << *(SExtOpnd->getOperand(OpIdx)) << '\n'); + if (SExtOpnd->getOperand(OpIdx)->getType() == SExt->getType() || + !shouldSExtOperand(SExtOpnd, OpIdx)) { + DEBUG(dbgs() << "No need to propagate\n"); + continue; + } + // Check if we can statically sign extend the operand. + Value *Opnd = SExtOpnd->getOperand(OpIdx); + if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) { + DEBUG(dbgs() << "Statically sign extend\n"); + TPT.setOperand( + SExtOpnd, OpIdx, + ConstantInt::getSigned(SExt->getType(), Cst->getSExtValue())); + continue; + } + // UndefValue are typed, so we have to statically sign extend them. + if (isa<UndefValue>(Opnd)) { + DEBUG(dbgs() << "Statically sign extend\n"); + TPT.setOperand(SExtOpnd, OpIdx, UndefValue::get(SExt->getType())); + continue; + } + + // Otherwise we have to explicity sign extend the operand. + // Check if SExt was reused to sign extend an operand. + if (!SExtForOpnd) { + // If yes, create a new one. + DEBUG(dbgs() << "More operands to sext\n"); + SExtForOpnd = TPT.createSExt(SExt, Opnd, SExt->getType()); + ++CreatedInsts; + } + + TPT.setOperand(SExtForOpnd, 0, Opnd); + + // Move the sign extension before the insertion point. + TPT.moveBefore(SExtForOpnd, SExtOpnd); + TPT.setOperand(SExtOpnd, OpIdx, SExtForOpnd); + // If more sext are required, new instructions will have to be created. + SExtForOpnd = NULL; + } + if (SExtForOpnd == SExt) { + DEBUG(dbgs() << "Sign extension is useless now\n"); + TPT.eraseInstruction(SExt); + } + return SExtOpnd; +} + +/// IsPromotionProfitable - Check whether or not promoting an instruction +/// to a wider type was profitable. +/// \p MatchedSize gives the number of instructions that have been matched +/// in the addressing mode after the promotion was applied. +/// \p SizeWithPromotion gives the number of created instructions for +/// the promotion plus the number of instructions that have been +/// matched in the addressing mode before the promotion. +/// \p PromotedOperand is the value that has been promoted. +/// \return True if the promotion is profitable, false otherwise. +bool +AddressingModeMatcher::IsPromotionProfitable(unsigned MatchedSize, + unsigned SizeWithPromotion, + Value *PromotedOperand) const { + // We folded less instructions than what we created to promote the operand. + // This is not profitable. + if (MatchedSize < SizeWithPromotion) + return false; + if (MatchedSize > SizeWithPromotion) + return true; + // The promotion is neutral but it may help folding the sign extension in + // loads for instance. + // Check that we did not create an illegal instruction. + Instruction *PromotedInst = dyn_cast<Instruction>(PromotedOperand); + if (!PromotedInst) + return false; + int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode()); + // If the ISDOpcode is undefined, it was undefined before the promotion. + if (!ISDOpcode) + return true; + // Otherwise, check if the promoted instruction is legal or not. + return TLI.isOperationLegalOrCustom(ISDOpcode, + EVT::getEVT(PromotedInst->getType())); +} + +/// MatchOperationAddr - Given an instruction or constant expr, see if we can +/// fold the operation into the addressing mode. If so, update the addressing +/// mode and return true, otherwise return false without modifying AddrMode. +/// If \p MovedAway is not NULL, it contains the information of whether or +/// not AddrInst has to be folded into the addressing mode on success. +/// If \p MovedAway == true, \p AddrInst will not be part of the addressing +/// because it has been moved away. +/// Thus AddrInst must not be added in the matched instructions. +/// This state can happen when AddrInst is a sext, since it may be moved away. +/// Therefore, AddrInst may not be valid when MovedAway is true and it must +/// not be referenced anymore. +bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, + unsigned Depth, + bool *MovedAway) { + // Avoid exponential behavior on extremely deep expression trees. + if (Depth >= 5) return false; + + // By default, all matched instructions stay in place. + if (MovedAway) + *MovedAway = false; + + switch (Opcode) { + case Instruction::PtrToInt: + // PtrToInt is always a noop, as we know that the int type is pointer sized. + return MatchAddr(AddrInst->getOperand(0), Depth); + case Instruction::IntToPtr: + // This inttoptr is a no-op if the integer type is pointer sized. + if (TLI.getValueType(AddrInst->getOperand(0)->getType()) == + TLI.getPointerTy(AddrInst->getType()->getPointerAddressSpace())) + return MatchAddr(AddrInst->getOperand(0), Depth); + return false; + case Instruction::BitCast: + // BitCast is always a noop, and we can handle it as long as it is + // int->int or pointer->pointer (we don't want int<->fp or something). + if ((AddrInst->getOperand(0)->getType()->isPointerTy() || + AddrInst->getOperand(0)->getType()->isIntegerTy()) && + // Don't touch identity bitcasts. These were probably put here by LSR, + // and we don't want to mess around with them. Assume it knows what it + // is doing. + AddrInst->getOperand(0)->getType() != AddrInst->getType()) + return MatchAddr(AddrInst->getOperand(0), Depth); + return false; + case Instruction::Add: { + // Check to see if we can merge in the RHS then the LHS. If so, we win. + ExtAddrMode BackupAddrMode = AddrMode; + unsigned OldSize = AddrModeInsts.size(); + // Start a transaction at this point. + // The LHS may match but not the RHS. + // Therefore, we need a higher level restoration point to undo partially + // matched operation. + TypePromotionTransaction::ConstRestorationPt LastKnownGood = + TPT.getRestorationPoint(); + + if (MatchAddr(AddrInst->getOperand(1), Depth+1) && + MatchAddr(AddrInst->getOperand(0), Depth+1)) + return true; + + // Restore the old addr mode info. + AddrMode = BackupAddrMode; + AddrModeInsts.resize(OldSize); + TPT.rollback(LastKnownGood); + + // Otherwise this was over-aggressive. Try merging in the LHS then the RHS. + if (MatchAddr(AddrInst->getOperand(0), Depth+1) && + MatchAddr(AddrInst->getOperand(1), Depth+1)) + return true; + + // Otherwise we definitely can't merge the ADD in. + AddrMode = BackupAddrMode; + AddrModeInsts.resize(OldSize); + TPT.rollback(LastKnownGood); + break; + } + //case Instruction::Or: + // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD. + //break; + case Instruction::Mul: + case Instruction::Shl: { + // Can only handle X*C and X << C. + ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1)); + if (!RHS) return false; + int64_t Scale = RHS->getSExtValue(); + if (Opcode == Instruction::Shl) + Scale = 1LL << Scale; + + return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth); + } + case Instruction::GetElementPtr: { + // Scan the GEP. We check it if it contains constant offsets and at most + // one variable offset. + int VariableOperand = -1; + unsigned VariableScale = 0; + + int64_t ConstantOffset = 0; + const DataLayout *TD = TLI.getDataLayout(); + gep_type_iterator GTI = gep_type_begin(AddrInst); + for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) { + if (StructType *STy = dyn_cast<StructType>(*GTI)) { + const StructLayout *SL = TD->getStructLayout(STy); + unsigned Idx = + cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue(); + ConstantOffset += SL->getElementOffset(Idx); + } else { + uint64_t TypeSize = TD->getTypeAllocSize(GTI.getIndexedType()); + if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) { + ConstantOffset += CI->getSExtValue()*TypeSize; + } else if (TypeSize) { // Scales of zero don't do anything. + // We only allow one variable index at the moment. + if (VariableOperand != -1) + return false; + + // Remember the variable index. + VariableOperand = i; + VariableScale = TypeSize; + } + } + } + + // A common case is for the GEP to only do a constant offset. In this case, + // just add it to the disp field and check validity. + if (VariableOperand == -1) { + AddrMode.BaseOffs += ConstantOffset; + if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){ + // Check to see if we can fold the base pointer in too. + if (MatchAddr(AddrInst->getOperand(0), Depth+1)) + return true; + } + AddrMode.BaseOffs -= ConstantOffset; + return false; + } + + // Save the valid addressing mode in case we can't match. + ExtAddrMode BackupAddrMode = AddrMode; + unsigned OldSize = AddrModeInsts.size(); + + // See if the scale and offset amount is valid for this target. + AddrMode.BaseOffs += ConstantOffset; + + // Match the base operand of the GEP. + if (!MatchAddr(AddrInst->getOperand(0), Depth+1)) { + // If it couldn't be matched, just stuff the value in a register. + if (AddrMode.HasBaseReg) { + AddrMode = BackupAddrMode; + AddrModeInsts.resize(OldSize); + return false; + } + AddrMode.HasBaseReg = true; + AddrMode.BaseReg = AddrInst->getOperand(0); + } + + // Match the remaining variable portion of the GEP. + if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale, + Depth)) { + // If it couldn't be matched, try stuffing the base into a register + // instead of matching it, and retrying the match of the scale. + AddrMode = BackupAddrMode; + AddrModeInsts.resize(OldSize); + if (AddrMode.HasBaseReg) + return false; + AddrMode.HasBaseReg = true; + AddrMode.BaseReg = AddrInst->getOperand(0); + AddrMode.BaseOffs += ConstantOffset; + if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), + VariableScale, Depth)) { + // If even that didn't work, bail. + AddrMode = BackupAddrMode; + AddrModeInsts.resize(OldSize); + return false; + } + } + + return true; + } + case Instruction::SExt: { + // Try to move this sext out of the way of the addressing mode. + Instruction *SExt = cast<Instruction>(AddrInst); + // Ask for a method for doing so. + TypePromotionHelper::Action TPH = TypePromotionHelper::getAction( + SExt, InsertedTruncs, TLI, PromotedInsts); + if (!TPH) + return false; + + TypePromotionTransaction::ConstRestorationPt LastKnownGood = + TPT.getRestorationPoint(); + unsigned CreatedInsts = 0; + Value *PromotedOperand = TPH(SExt, TPT, PromotedInsts, CreatedInsts); + // SExt has been moved away. + // Thus either it will be rematched later in the recursive calls or it is + // gone. Anyway, we must not fold it into the addressing mode at this point. + // E.g., + // op = add opnd, 1 + // idx = sext op + // addr = gep base, idx + // is now: + // promotedOpnd = sext opnd <- no match here + // op = promoted_add promotedOpnd, 1 <- match (later in recursive calls) + // addr = gep base, op <- match + if (MovedAway) + *MovedAway = true; + + assert(PromotedOperand && + "TypePromotionHelper should have filtered out those cases"); + + ExtAddrMode BackupAddrMode = AddrMode; + unsigned OldSize = AddrModeInsts.size(); + + if (!MatchAddr(PromotedOperand, Depth) || + !IsPromotionProfitable(AddrModeInsts.size(), OldSize + CreatedInsts, + PromotedOperand)) { + AddrMode = BackupAddrMode; + AddrModeInsts.resize(OldSize); + DEBUG(dbgs() << "Sign extension does not pay off: rollback\n"); + TPT.rollback(LastKnownGood); + return false; + } + return true; + } + } + return false; +} + +/// MatchAddr - If we can, try to add the value of 'Addr' into the current +/// addressing mode. If Addr can't be added to AddrMode this returns false and +/// leaves AddrMode unmodified. This assumes that Addr is either a pointer type +/// or intptr_t for the target. +/// +bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { + // Start a transaction at this point that we will rollback if the matching + // fails. + TypePromotionTransaction::ConstRestorationPt LastKnownGood = + TPT.getRestorationPoint(); + if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) { + // Fold in immediates if legal for the target. + AddrMode.BaseOffs += CI->getSExtValue(); + if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) + return true; + AddrMode.BaseOffs -= CI->getSExtValue(); + } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) { + // If this is a global variable, try to fold it into the addressing mode. + if (AddrMode.BaseGV == 0) { + AddrMode.BaseGV = GV; + if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) + return true; + AddrMode.BaseGV = 0; + } + } else if (Instruction *I = dyn_cast<Instruction>(Addr)) { + ExtAddrMode BackupAddrMode = AddrMode; + unsigned OldSize = AddrModeInsts.size(); + + // Check to see if it is possible to fold this operation. + bool MovedAway = false; + if (MatchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) { + // This instruction may have been move away. If so, there is nothing + // to check here. + if (MovedAway) + return true; + // Okay, it's possible to fold this. Check to see if it is actually + // *profitable* to do so. We use a simple cost model to avoid increasing + // register pressure too much. + if (I->hasOneUse() || + IsProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) { + AddrModeInsts.push_back(I); + return true; + } + + // It isn't profitable to do this, roll back. + //cerr << "NOT FOLDING: " << *I; + AddrMode = BackupAddrMode; + AddrModeInsts.resize(OldSize); + TPT.rollback(LastKnownGood); + } + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) { + if (MatchOperationAddr(CE, CE->getOpcode(), Depth)) + return true; + TPT.rollback(LastKnownGood); + } else if (isa<ConstantPointerNull>(Addr)) { + // Null pointer gets folded without affecting the addressing mode. + return true; + } + + // Worse case, the target should support [reg] addressing modes. :) + if (!AddrMode.HasBaseReg) { + AddrMode.HasBaseReg = true; + AddrMode.BaseReg = Addr; + // Still check for legality in case the target supports [imm] but not [i+r]. + if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) + return true; + AddrMode.HasBaseReg = false; + AddrMode.BaseReg = 0; + } + + // If the base register is already taken, see if we can do [r+r]. + if (AddrMode.Scale == 0) { + AddrMode.Scale = 1; + AddrMode.ScaledReg = Addr; + if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) + return true; + AddrMode.Scale = 0; + AddrMode.ScaledReg = 0; + } + // Couldn't match. + TPT.rollback(LastKnownGood); + return false; +} + +/// IsOperandAMemoryOperand - Check to see if all uses of OpVal by the specified +/// inline asm call are due to memory operands. If so, return true, otherwise +/// return false. +static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, + const TargetLowering &TLI) { + TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI)); + for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { + TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; + + // Compute the constraint code and ConstraintType to use. + TLI.ComputeConstraintToUse(OpInfo, SDValue()); + + // If this asm operand is our Value*, and if it isn't an indirect memory + // operand, we can't fold it! + if (OpInfo.CallOperandVal == OpVal && + (OpInfo.ConstraintType != TargetLowering::C_Memory || + !OpInfo.isIndirect)) + return false; + } + + return true; +} + +/// FindAllMemoryUses - Recursively walk all the uses of I until we find a +/// memory use. If we find an obviously non-foldable instruction, return true. +/// Add the ultimately found memory instructions to MemoryUses. +static bool FindAllMemoryUses(Instruction *I, + SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses, + SmallPtrSet<Instruction*, 16> &ConsideredInsts, + const TargetLowering &TLI) { + // If we already considered this instruction, we're done. + if (!ConsideredInsts.insert(I)) + return false; + + // If this is an obviously unfoldable instruction, bail out. + if (!MightBeFoldableInst(I)) + return true; + + // Loop over all the uses, recursively processing them. + for (Use &U : I->uses()) { + Instruction *UserI = cast<Instruction>(U.getUser()); + + if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) { + MemoryUses.push_back(std::make_pair(LI, U.getOperandNo())); + continue; + } + + if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) { + unsigned opNo = U.getOperandNo(); + if (opNo == 0) return true; // Storing addr, not into addr. + MemoryUses.push_back(std::make_pair(SI, opNo)); + continue; + } + + if (CallInst *CI = dyn_cast<CallInst>(UserI)) { + InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue()); + if (!IA) return true; + + // If this is a memory operand, we're cool, otherwise bail out. + if (!IsOperandAMemoryOperand(CI, IA, I, TLI)) + return true; + continue; + } + + if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI)) + return true; + } + + return false; +} + +/// ValueAlreadyLiveAtInst - Retrn true if Val is already known to be live at +/// the use site that we're folding it into. If so, there is no cost to +/// include it in the addressing mode. KnownLive1 and KnownLive2 are two values +/// that we know are live at the instruction already. +bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1, + Value *KnownLive2) { + // If Val is either of the known-live values, we know it is live! + if (Val == 0 || Val == KnownLive1 || Val == KnownLive2) + return true; + + // All values other than instructions and arguments (e.g. constants) are live. + if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true; + + // If Val is a constant sized alloca in the entry block, it is live, this is + // true because it is just a reference to the stack/frame pointer, which is + // live for the whole function. + if (AllocaInst *AI = dyn_cast<AllocaInst>(Val)) + if (AI->isStaticAlloca()) + return true; + + // Check to see if this value is already used in the memory instruction's + // block. If so, it's already live into the block at the very least, so we + // can reasonably fold it. + return Val->isUsedInBasicBlock(MemoryInst->getParent()); +} + +/// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing +/// mode of the machine to fold the specified instruction into a load or store +/// that ultimately uses it. However, the specified instruction has multiple +/// uses. Given this, it may actually increase register pressure to fold it +/// into the load. For example, consider this code: +/// +/// X = ... +/// Y = X+1 +/// use(Y) -> nonload/store +/// Z = Y+1 +/// load Z +/// +/// In this case, Y has multiple uses, and can be folded into the load of Z +/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to +/// be live at the use(Y) line. If we don't fold Y into load Z, we use one +/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the +/// number of computations either. +/// +/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If +/// X was live across 'load Z' for other reasons, we actually *would* want to +/// fold the addressing mode in the Z case. This would make Y die earlier. +bool AddressingModeMatcher:: +IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, + ExtAddrMode &AMAfter) { + if (IgnoreProfitability) return true; + + // AMBefore is the addressing mode before this instruction was folded into it, + // and AMAfter is the addressing mode after the instruction was folded. Get + // the set of registers referenced by AMAfter and subtract out those + // referenced by AMBefore: this is the set of values which folding in this + // address extends the lifetime of. + // + // Note that there are only two potential values being referenced here, + // BaseReg and ScaleReg (global addresses are always available, as are any + // folded immediates). + Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg; + + // If the BaseReg or ScaledReg was referenced by the previous addrmode, their + // lifetime wasn't extended by adding this instruction. + if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg)) + BaseReg = 0; + if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg)) + ScaledReg = 0; + + // If folding this instruction (and it's subexprs) didn't extend any live + // ranges, we're ok with it. + if (BaseReg == 0 && ScaledReg == 0) + return true; + + // If all uses of this instruction are ultimately load/store/inlineasm's, + // check to see if their addressing modes will include this instruction. If + // so, we can fold it into all uses, so it doesn't matter if it has multiple + // uses. + SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses; + SmallPtrSet<Instruction*, 16> ConsideredInsts; + if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI)) + return false; // Has a non-memory, non-foldable use! + + // Now that we know that all uses of this instruction are part of a chain of + // computation involving only operations that could theoretically be folded + // into a memory use, loop over each of these uses and see if they could + // *actually* fold the instruction. + SmallVector<Instruction*, 32> MatchedAddrModeInsts; + for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) { + Instruction *User = MemoryUses[i].first; + unsigned OpNo = MemoryUses[i].second; + + // Get the access type of this use. If the use isn't a pointer, we don't + // know what it accesses. + Value *Address = User->getOperand(OpNo); + if (!Address->getType()->isPointerTy()) + return false; + Type *AddressAccessTy = Address->getType()->getPointerElementType(); + + // Do a match against the root of this address, ignoring profitability. This + // will tell us if the addressing mode for the memory operation will + // *actually* cover the shared instruction. + ExtAddrMode Result; + TypePromotionTransaction::ConstRestorationPt LastKnownGood = + TPT.getRestorationPoint(); + AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy, + MemoryInst, Result, InsertedTruncs, + PromotedInsts, TPT); + Matcher.IgnoreProfitability = true; + bool Success = Matcher.MatchAddr(Address, 0); + (void)Success; assert(Success && "Couldn't select *anything*?"); + + // The match was to check the profitability, the changes made are not + // part of the original matcher. Therefore, they should be dropped + // otherwise the original matcher will not present the right state. + TPT.rollback(LastKnownGood); + + // If the match didn't cover I, then it won't be shared by it. + if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(), + I) == MatchedAddrModeInsts.end()) + return false; + + MatchedAddrModeInsts.clear(); + } + + return true; +} + +} // end anonymous namespace + +/// IsNonLocalValue - Return true if the specified values are defined in a +/// different basic block than BB. +static bool IsNonLocalValue(Value *V, BasicBlock *BB) { + if (Instruction *I = dyn_cast<Instruction>(V)) + return I->getParent() != BB; + return false; +} + +/// OptimizeMemoryInst - Load and Store Instructions often have +/// addressing modes that can do significant amounts of computation. As such, +/// instruction selection will try to get the load or store to do as much +/// computation as possible for the program. The problem is that isel can only +/// see within a single block. As such, we sink as much legal addressing mode +/// stuff into the block as possible. +/// +/// This method is used to optimize both load/store and inline asms with memory +/// operands. +bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, + Type *AccessTy) { + Value *Repl = Addr; + + // Try to collapse single-value PHI nodes. This is necessary to undo + // unprofitable PRE transformations. + SmallVector<Value*, 8> worklist; + SmallPtrSet<Value*, 16> Visited; + worklist.push_back(Addr); + + // Use a worklist to iteratively look through PHI nodes, and ensure that + // the addressing mode obtained from the non-PHI roots of the graph + // are equivalent. + Value *Consensus = 0; + unsigned NumUsesConsensus = 0; + bool IsNumUsesConsensusValid = false; + SmallVector<Instruction*, 16> AddrModeInsts; + ExtAddrMode AddrMode; + TypePromotionTransaction TPT; + TypePromotionTransaction::ConstRestorationPt LastKnownGood = + TPT.getRestorationPoint(); + while (!worklist.empty()) { + Value *V = worklist.back(); + worklist.pop_back(); + + // Break use-def graph loops. + if (!Visited.insert(V)) { + Consensus = 0; + break; + } + + // For a PHI node, push all of its incoming values. + if (PHINode *P = dyn_cast<PHINode>(V)) { + for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) + worklist.push_back(P->getIncomingValue(i)); + continue; + } + + // For non-PHIs, determine the addressing mode being computed. + SmallVector<Instruction*, 16> NewAddrModeInsts; + ExtAddrMode NewAddrMode = AddressingModeMatcher::Match( + V, AccessTy, MemoryInst, NewAddrModeInsts, *TLI, InsertedTruncsSet, + PromotedInsts, TPT); + + // This check is broken into two cases with very similar code to avoid using + // getNumUses() as much as possible. Some values have a lot of uses, so + // calling getNumUses() unconditionally caused a significant compile-time + // regression. + if (!Consensus) { + Consensus = V; + AddrMode = NewAddrMode; + AddrModeInsts = NewAddrModeInsts; + continue; + } else if (NewAddrMode == AddrMode) { + if (!IsNumUsesConsensusValid) { + NumUsesConsensus = Consensus->getNumUses(); + IsNumUsesConsensusValid = true; + } + + // Ensure that the obtained addressing mode is equivalent to that obtained + // for all other roots of the PHI traversal. Also, when choosing one + // such root as representative, select the one with the most uses in order + // to keep the cost modeling heuristics in AddressingModeMatcher + // applicable. + unsigned NumUses = V->getNumUses(); + if (NumUses > NumUsesConsensus) { + Consensus = V; + NumUsesConsensus = NumUses; + AddrModeInsts = NewAddrModeInsts; + } + continue; + } + + Consensus = 0; + break; + } + + // If the addressing mode couldn't be determined, or if multiple different + // ones were determined, bail out now. + if (!Consensus) { + TPT.rollback(LastKnownGood); + return false; + } + TPT.commit(); + + // Check to see if any of the instructions supersumed by this addr mode are + // non-local to I's BB. + bool AnyNonLocal = false; + for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) { + if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) { + AnyNonLocal = true; + break; + } + } + + // If all the instructions matched are already in this BB, don't do anything. + if (!AnyNonLocal) { + DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n"); + return false; + } + + // Insert this computation right after this user. Since our caller is + // scanning from the top of the BB to the bottom, reuse of the expr are + // guaranteed to happen later. + IRBuilder<> Builder(MemoryInst); + + // Now that we determined the addressing expression we want to use and know + // that we have to sink it into this block. Check to see if we have already + // done this for some other load/store instr in this block. If so, reuse the + // computation. + Value *&SunkAddr = SunkAddrs[Addr]; + if (SunkAddr) { + DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " + << *MemoryInst); + if (SunkAddr->getType() != Addr->getType()) + SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType()); + } else { + DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " + << *MemoryInst); + Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType()); + Value *Result = 0; + + // Start with the base register. Do this first so that subsequent address + // matching finds it last, which will prevent it from trying to match it + // as the scaled value in case it happens to be a mul. That would be + // problematic if we've sunk a different mul for the scale, because then + // we'd end up sinking both muls. + if (AddrMode.BaseReg) { + Value *V = AddrMode.BaseReg; + if (V->getType()->isPointerTy()) + V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr"); + if (V->getType() != IntPtrTy) + V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr"); + Result = V; + } + + // Add the scale value. + if (AddrMode.Scale) { + Value *V = AddrMode.ScaledReg; + if (V->getType() == IntPtrTy) { + // done. + } else if (V->getType()->isPointerTy()) { + V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr"); + } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() < + cast<IntegerType>(V->getType())->getBitWidth()) { + V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); + } else { + // It is only safe to sign extend the BaseReg if we know that the math + // required to create it did not overflow before we extend it. Since + // the original IR value was tossed in favor of a constant back when + // the AddrMode was created we need to bail out gracefully if widths + // do not match instead of extending it. + if (Result != AddrMode.BaseReg) + cast<Instruction>(Result)->eraseFromParent(); + return false; + } + if (AddrMode.Scale != 1) + V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale), + "sunkaddr"); + if (Result) + Result = Builder.CreateAdd(Result, V, "sunkaddr"); + else + Result = V; + } + + // Add in the BaseGV if present. + if (AddrMode.BaseGV) { + Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr"); + if (Result) + Result = Builder.CreateAdd(Result, V, "sunkaddr"); + else + Result = V; + } + + // Add in the Base Offset if present. + if (AddrMode.BaseOffs) { + Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); + if (Result) + Result = Builder.CreateAdd(Result, V, "sunkaddr"); + else + Result = V; + } + + if (Result == 0) + SunkAddr = Constant::getNullValue(Addr->getType()); + else + SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr"); + } + + MemoryInst->replaceUsesOfWith(Repl, SunkAddr); + + // If we have no uses, recursively delete the value and all dead instructions + // using it. + if (Repl->use_empty()) { + // This can cause recursive deletion, which can invalidate our iterator. + // Use a WeakVH to hold onto it in case this happens. + WeakVH IterHandle(CurInstIterator); + BasicBlock *BB = CurInstIterator->getParent(); + + RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo); + + if (IterHandle != CurInstIterator) { + // If the iterator instruction was recursively deleted, start over at the + // start of the block. + CurInstIterator = BB->begin(); + SunkAddrs.clear(); + } + } + ++NumMemoryInsts; + return true; +} + +/// OptimizeInlineAsmInst - If there are any memory operands, use +/// OptimizeMemoryInst to sink their address computing into the block when +/// possible / profitable. +bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) { + bool MadeChange = false; + + TargetLowering::AsmOperandInfoVector + TargetConstraints = TLI->ParseConstraints(CS); + unsigned ArgNo = 0; + for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { + TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; + + // Compute the constraint code and ConstraintType to use. + TLI->ComputeConstraintToUse(OpInfo, SDValue()); + + if (OpInfo.ConstraintType == TargetLowering::C_Memory && + OpInfo.isIndirect) { + Value *OpVal = CS->getArgOperand(ArgNo++); + MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType()); + } else if (OpInfo.Type == InlineAsm::isInput) + ArgNo++; + } + + return MadeChange; +} + +/// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same +/// basic block as the load, unless conditions are unfavorable. This allows +/// SelectionDAG to fold the extend into the load. +/// +bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) { + // Look for a load being extended. + LoadInst *LI = dyn_cast<LoadInst>(I->getOperand(0)); + if (!LI) return false; + + // If they're already in the same block, there's nothing to do. + if (LI->getParent() == I->getParent()) + return false; + + // If the load has other users and the truncate is not free, this probably + // isn't worthwhile. + if (!LI->hasOneUse() && + TLI && (TLI->isTypeLegal(TLI->getValueType(LI->getType())) || + !TLI->isTypeLegal(TLI->getValueType(I->getType()))) && + !TLI->isTruncateFree(I->getType(), LI->getType())) + return false; + + // Check whether the target supports casts folded into loads. + unsigned LType; + if (isa<ZExtInst>(I)) + LType = ISD::ZEXTLOAD; + else { + assert(isa<SExtInst>(I) && "Unexpected ext type!"); + LType = ISD::SEXTLOAD; + } + if (TLI && !TLI->isLoadExtLegal(LType, TLI->getValueType(LI->getType()))) + return false; + + // Move the extend into the same block as the load, so that SelectionDAG + // can fold it. + I->removeFromParent(); + I->insertAfter(LI); + ++NumExtsMoved; + return true; +} + +bool CodeGenPrepare::OptimizeExtUses(Instruction *I) { + BasicBlock *DefBB = I->getParent(); + + // If the result of a {s|z}ext and its source are both live out, rewrite all + // other uses of the source with result of extension. + Value *Src = I->getOperand(0); + if (Src->hasOneUse()) + return false; + + // Only do this xform if truncating is free. + if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType())) + return false; + + // Only safe to perform the optimization if the source is also defined in + // this block. + if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent()) + return false; + + bool DefIsLiveOut = false; + for (User *U : I->users()) { + Instruction *UI = cast<Instruction>(U); + + // Figure out which BB this ext is used in. + BasicBlock *UserBB = UI->getParent(); + if (UserBB == DefBB) continue; + DefIsLiveOut = true; + break; + } + if (!DefIsLiveOut) + return false; + + // Make sure none of the uses are PHI nodes. + for (User *U : Src->users()) { + Instruction *UI = cast<Instruction>(U); + BasicBlock *UserBB = UI->getParent(); + if (UserBB == DefBB) continue; + // Be conservative. We don't want this xform to end up introducing + // reloads just before load / store instructions. + if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI)) + return false; + } + + // InsertedTruncs - Only insert one trunc in each block once. + DenseMap<BasicBlock*, Instruction*> InsertedTruncs; + + bool MadeChange = false; + for (Use &U : Src->uses()) { + Instruction *User = cast<Instruction>(U.getUser()); + + // Figure out which BB this ext is used in. + BasicBlock *UserBB = User->getParent(); + if (UserBB == DefBB) continue; + + // Both src and def are live in this block. Rewrite the use. + Instruction *&InsertedTrunc = InsertedTruncs[UserBB]; + + if (!InsertedTrunc) { + BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); + InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt); + InsertedTruncsSet.insert(InsertedTrunc); + } + + // Replace a use of the {s|z}ext source with a use of the result. + U = InsertedTrunc; + ++NumExtUses; + MadeChange = true; + } + + return MadeChange; +} + +/// isFormingBranchFromSelectProfitable - Returns true if a SelectInst should be +/// turned into an explicit branch. +static bool isFormingBranchFromSelectProfitable(SelectInst *SI) { + // FIXME: This should use the same heuristics as IfConversion to determine + // whether a select is better represented as a branch. This requires that + // branch probability metadata is preserved for the select, which is not the + // case currently. + + CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition()); + + // If the branch is predicted right, an out of order CPU can avoid blocking on + // the compare. Emit cmovs on compares with a memory operand as branches to + // avoid stalls on the load from memory. If the compare has more than one use + // there's probably another cmov or setcc around so it's not worth emitting a + // branch. + if (!Cmp) + return false; + + Value *CmpOp0 = Cmp->getOperand(0); + Value *CmpOp1 = Cmp->getOperand(1); + + // We check that the memory operand has one use to avoid uses of the loaded + // value directly after the compare, making branches unprofitable. + return Cmp->hasOneUse() && + ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) || + (isa<LoadInst>(CmpOp1) && CmpOp1->hasOneUse())); +} + + +/// If we have a SelectInst that will likely profit from branch prediction, +/// turn it into a branch. +bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) { + bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1); + + // Can we convert the 'select' to CF ? + if (DisableSelectToBranch || OptSize || !TLI || VectorCond) + return false; + + TargetLowering::SelectSupportKind SelectKind; + if (VectorCond) + SelectKind = TargetLowering::VectorMaskSelect; + else if (SI->getType()->isVectorTy()) + SelectKind = TargetLowering::ScalarCondVectorVal; + else + SelectKind = TargetLowering::ScalarValSelect; + + // Do we have efficient codegen support for this kind of 'selects' ? + if (TLI->isSelectSupported(SelectKind)) { + // We have efficient codegen support for the select instruction. + // Check if it is profitable to keep this 'select'. + if (!TLI->isPredictableSelectExpensive() || + !isFormingBranchFromSelectProfitable(SI)) + return false; + } + + ModifiedDT = true; + + // First, we split the block containing the select into 2 blocks. + BasicBlock *StartBlock = SI->getParent(); + BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(SI)); + BasicBlock *NextBlock = StartBlock->splitBasicBlock(SplitPt, "select.end"); + + // Create a new block serving as the landing pad for the branch. + BasicBlock *SmallBlock = BasicBlock::Create(SI->getContext(), "select.mid", + NextBlock->getParent(), NextBlock); + + // Move the unconditional branch from the block with the select in it into our + // landing pad block. + StartBlock->getTerminator()->eraseFromParent(); + BranchInst::Create(NextBlock, SmallBlock); + + // Insert the real conditional branch based on the original condition. + BranchInst::Create(NextBlock, SmallBlock, SI->getCondition(), SI); + + // The select itself is replaced with a PHI Node. + PHINode *PN = PHINode::Create(SI->getType(), 2, "", NextBlock->begin()); + PN->takeName(SI); + PN->addIncoming(SI->getTrueValue(), StartBlock); + PN->addIncoming(SI->getFalseValue(), SmallBlock); + SI->replaceAllUsesWith(PN); + SI->eraseFromParent(); + + // Instruct OptimizeBlock to skip to the next block. + CurInstIterator = StartBlock->end(); + ++NumSelectsExpanded; + return true; +} + +static bool isBroadcastShuffle(ShuffleVectorInst *SVI) { + SmallVector<int, 16> Mask(SVI->getShuffleMask()); + int SplatElem = -1; + for (unsigned i = 0; i < Mask.size(); ++i) { + if (SplatElem != -1 && Mask[i] != -1 && Mask[i] != SplatElem) + return false; + SplatElem = Mask[i]; + } + + return true; +} + +/// Some targets have expensive vector shifts if the lanes aren't all the same +/// (e.g. x86 only introduced "vpsllvd" and friends with AVX2). In these cases +/// it's often worth sinking a shufflevector splat down to its use so that +/// codegen can spot all lanes are identical. +bool CodeGenPrepare::OptimizeShuffleVectorInst(ShuffleVectorInst *SVI) { + BasicBlock *DefBB = SVI->getParent(); + + // Only do this xform if variable vector shifts are particularly expensive. + if (!TLI || !TLI->isVectorShiftByScalarCheap(SVI->getType())) + return false; + + // We only expect better codegen by sinking a shuffle if we can recognise a + // constant splat. + if (!isBroadcastShuffle(SVI)) + return false; + + // InsertedShuffles - Only insert a shuffle in each block once. + DenseMap<BasicBlock*, Instruction*> InsertedShuffles; + + bool MadeChange = false; + for (User *U : SVI->users()) { + Instruction *UI = cast<Instruction>(U); + + // Figure out which BB this ext is used in. + BasicBlock *UserBB = UI->getParent(); + if (UserBB == DefBB) continue; + + // For now only apply this when the splat is used by a shift instruction. + if (!UI->isShift()) continue; + + // Everything checks out, sink the shuffle if the user's block doesn't + // already have a copy. + Instruction *&InsertedShuffle = InsertedShuffles[UserBB]; + + if (!InsertedShuffle) { + BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); + InsertedShuffle = new ShuffleVectorInst(SVI->getOperand(0), + SVI->getOperand(1), + SVI->getOperand(2), "", InsertPt); + } + + UI->replaceUsesOfWith(SVI, InsertedShuffle); + MadeChange = true; + } + + // If we removed all uses, nuke the shuffle. + if (SVI->use_empty()) { + SVI->eraseFromParent(); + MadeChange = true; + } + + return MadeChange; +} + +bool CodeGenPrepare::OptimizeInst(Instruction *I) { + if (PHINode *P = dyn_cast<PHINode>(I)) { + // It is possible for very late stage optimizations (such as SimplifyCFG) + // to introduce PHI nodes too late to be cleaned up. If we detect such a + // trivial PHI, go ahead and zap it here. + if (Value *V = SimplifyInstruction(P, TLI ? TLI->getDataLayout() : 0, + TLInfo, DT)) { + P->replaceAllUsesWith(V); + P->eraseFromParent(); + ++NumPHIsElim; + return true; + } + return false; + } + + if (CastInst *CI = dyn_cast<CastInst>(I)) { + // If the source of the cast is a constant, then this should have + // already been constant folded. The only reason NOT to constant fold + // it is if something (e.g. LSR) was careful to place the constant + // evaluation in a block other than then one that uses it (e.g. to hoist + // the address of globals out of a loop). If this is the case, we don't + // want to forward-subst the cast. + if (isa<Constant>(CI->getOperand(0))) + return false; + + if (TLI && OptimizeNoopCopyExpression(CI, *TLI)) + return true; + + if (isa<ZExtInst>(I) || isa<SExtInst>(I)) { + /// Sink a zext or sext into its user blocks if the target type doesn't + /// fit in one register + if (TLI && TLI->getTypeAction(CI->getContext(), + TLI->getValueType(CI->getType())) == + TargetLowering::TypeExpandInteger) { + return SinkCast(CI); + } else { + bool MadeChange = MoveExtToFormExtLoad(I); + return MadeChange | OptimizeExtUses(I); + } + } + return false; + } + + if (CmpInst *CI = dyn_cast<CmpInst>(I)) + if (!TLI || !TLI->hasMultipleConditionRegisters()) + return OptimizeCmpExpression(CI); + + if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + if (TLI) + return OptimizeMemoryInst(I, I->getOperand(0), LI->getType()); + return false; + } + + if (StoreInst *SI = dyn_cast<StoreInst>(I)) { + if (TLI) + return OptimizeMemoryInst(I, SI->getOperand(1), + SI->getOperand(0)->getType()); + return false; + } + + if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) { + if (GEPI->hasAllZeroIndices()) { + /// The GEP operand must be a pointer, so must its result -> BitCast + Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(), + GEPI->getName(), GEPI); + GEPI->replaceAllUsesWith(NC); + GEPI->eraseFromParent(); + ++NumGEPsElim; + OptimizeInst(NC); + return true; + } + return false; + } + + if (CallInst *CI = dyn_cast<CallInst>(I)) + return OptimizeCallInst(CI); + + if (SelectInst *SI = dyn_cast<SelectInst>(I)) + return OptimizeSelectInst(SI); + + if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) + return OptimizeShuffleVectorInst(SVI); + + return false; +} + +// In this pass we look for GEP and cast instructions that are used +// across basic blocks and rewrite them to improve basic-block-at-a-time +// selection. +bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { + SunkAddrs.clear(); + bool MadeChange = false; + + CurInstIterator = BB.begin(); + while (CurInstIterator != BB.end()) + MadeChange |= OptimizeInst(CurInstIterator++); + + MadeChange |= DupRetToEnableTailCallOpts(&BB); + + return MadeChange; +} + +// llvm.dbg.value is far away from the value then iSel may not be able +// handle it properly. iSel will drop llvm.dbg.value if it can not +// find a node corresponding to the value. +bool CodeGenPrepare::PlaceDbgValues(Function &F) { + bool MadeChange = false; + for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { + Instruction *PrevNonDbgInst = NULL; + for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) { + Instruction *Insn = BI; ++BI; + DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn); + if (!DVI) { + PrevNonDbgInst = Insn; + continue; + } + + Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue()); + if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) { + DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI); + DVI->removeFromParent(); + if (isa<PHINode>(VI)) + DVI->insertBefore(VI->getParent()->getFirstInsertionPt()); + else + DVI->insertAfter(VI); + MadeChange = true; + ++NumDbgValueMoved; + } + } + } + return MadeChange; +} + +// If there is a sequence that branches based on comparing a single bit +// against zero that can be combined into a single instruction, and the +// target supports folding these into a single instruction, sink the +// mask and compare into the branch uses. Do this before OptimizeBlock -> +// OptimizeInst -> OptimizeCmpExpression, which perturbs the pattern being +// searched for. +bool CodeGenPrepare::sinkAndCmp(Function &F) { + if (!EnableAndCmpSinking) + return false; + if (!TLI || !TLI->isMaskAndBranchFoldingLegal()) + return false; + bool MadeChange = false; + for (Function::iterator I = F.begin(), E = F.end(); I != E; ) { + BasicBlock *BB = I++; + + // Does this BB end with the following? + // %andVal = and %val, #single-bit-set + // %icmpVal = icmp %andResult, 0 + // br i1 %cmpVal label %dest1, label %dest2" + BranchInst *Brcc = dyn_cast<BranchInst>(BB->getTerminator()); + if (!Brcc || !Brcc->isConditional()) + continue; + ICmpInst *Cmp = dyn_cast<ICmpInst>(Brcc->getOperand(0)); + if (!Cmp || Cmp->getParent() != BB) + continue; + ConstantInt *Zero = dyn_cast<ConstantInt>(Cmp->getOperand(1)); + if (!Zero || !Zero->isZero()) + continue; + Instruction *And = dyn_cast<Instruction>(Cmp->getOperand(0)); + if (!And || And->getOpcode() != Instruction::And || And->getParent() != BB) + continue; + ConstantInt* Mask = dyn_cast<ConstantInt>(And->getOperand(1)); + if (!Mask || !Mask->getUniqueInteger().isPowerOf2()) + continue; + DEBUG(dbgs() << "found and; icmp ?,0; brcc\n"); DEBUG(BB->dump()); + + // Push the "and; icmp" for any users that are conditional branches. + // Since there can only be one branch use per BB, we don't need to keep + // track of which BBs we insert into. + for (Value::use_iterator UI = Cmp->use_begin(), E = Cmp->use_end(); + UI != E; ) { + Use &TheUse = *UI; + // Find brcc use. + BranchInst *BrccUser = dyn_cast<BranchInst>(*UI); + ++UI; + if (!BrccUser || !BrccUser->isConditional()) + continue; + BasicBlock *UserBB = BrccUser->getParent(); + if (UserBB == BB) continue; + DEBUG(dbgs() << "found Brcc use\n"); + + // Sink the "and; icmp" to use. + MadeChange = true; + BinaryOperator *NewAnd = + BinaryOperator::CreateAnd(And->getOperand(0), And->getOperand(1), "", + BrccUser); + CmpInst *NewCmp = + CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(), NewAnd, Zero, + "", BrccUser); + TheUse = NewCmp; + ++NumAndCmpsMoved; + DEBUG(BrccUser->getParent()->dump()); + } + } + return MadeChange; +} diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index 18c8e0a..463eb86 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -595,7 +595,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, if (RC == reinterpret_cast<TargetRegisterClass *>(-1)) AntiDepReg = 0; - // Look for a suitable register to use to break the anti-depenence. + // Look for a suitable register to use to break the anti-dependence. // // TODO: Instead of picking the first free register, consider which might // be the best. diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h index 565d20b..1949a48 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.h +++ b/lib/CodeGen/CriticalAntiDepBreaker.h @@ -72,7 +72,7 @@ class TargetRegisterInfo; ~CriticalAntiDepBreaker(); /// Start - Initialize anti-dep breaking for a new basic block. - void StartBlock(MachineBasicBlock *BB); + void StartBlock(MachineBasicBlock *BB) override; /// BreakAntiDependencies - Identifiy anti-dependencies along the critical /// path @@ -82,15 +82,16 @@ class TargetRegisterInfo; MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned InsertPosIndex, - DbgValueVector &DbgValues); + DbgValueVector &DbgValues) override; /// Observe - Update liveness information to account for the current /// instruction, which will not be scheduled. /// - void Observe(MachineInstr *MI, unsigned Count, unsigned InsertPosIndex); + void Observe(MachineInstr *MI, unsigned Count, + unsigned InsertPosIndex) override; /// Finish - Finish anti-dep breaking for a basic block. - void FinishBlock(); + void FinishBlock() override; private: void PrescanInstruction(MachineInstr *MI); diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp index 6619bcf..5b40ae1 100644 --- a/lib/CodeGen/DFAPacketizer.cpp +++ b/lib/CodeGen/DFAPacketizer.cpp @@ -108,7 +108,7 @@ public: DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, bool IsPostRA); // Schedule - Actual scheduling work. - void schedule(); + void schedule() override; }; } diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp index 5efe1ff..aa03e77 100644 --- a/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -27,7 +27,7 @@ STATISTIC(NumDeletes, "Number of dead instructions deleted"); namespace { class DeadMachineInstructionElim : public MachineFunctionPass { - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; const TargetRegisterInfo *TRI; const MachineRegisterInfo *MRI; @@ -84,6 +84,9 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const { } bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { + if (skipOptnoneFunction(*MF.getFunction())) + return false; + bool AnyChanges = false; MRI = &MF.getRegInfo(); TRI = MF.getTarget().getRegisterInfo(); @@ -127,17 +130,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { unsigned Reg = MO.getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; - MachineRegisterInfo::use_iterator nextI; - for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg), - E = MRI->use_end(); I!=E; I=nextI) { - nextI = llvm::next(I); // I is invalidated by the setReg - MachineOperand& Use = I.getOperand(); - MachineInstr *UseMI = Use.getParent(); - if (UseMI==MI) - continue; - assert(Use.isDebug()); - UseMI->getOperand(0).setReg(0U); - } + MRI->markUsesInDebugValueAsUndef(Reg); } AnyChanges = true; MI->eraseFromParent(); diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp index c7c1752..d543baf 100644 --- a/lib/CodeGen/DwarfEHPrepare.cpp +++ b/lib/CodeGen/DwarfEHPrepare.cpp @@ -15,14 +15,14 @@ #define DEBUG_TYPE "dwarfehprepare" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/Dominators.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Pass.h" -#include "llvm/Support/CallSite.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" @@ -42,16 +42,16 @@ namespace { public: static char ID; // Pass identification, replacement for typeid. - DwarfEHPrepare(const TargetMachine *TM) : - FunctionPass(ID), TM(TM), RewindFunction(0) { - initializeDominatorTreePass(*PassRegistry::getPassRegistry()); - } + DwarfEHPrepare(const TargetMachine *TM) + : FunctionPass(ID), TM(TM), RewindFunction(0) { + initializeDominatorTreeWrapperPassPass(*PassRegistry::getPassRegistry()); + } - virtual bool runOnFunction(Function &Fn); + bool runOnFunction(Function &Fn) override; - virtual void getAnalysisUsage(AnalysisUsage &AU) const { } + void getAnalysisUsage(AnalysisUsage &AU) const override { } - const char *getPassName() const { + const char *getPassName() const override { return "Exception handling preparation"; } }; diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp index 5447df0..f8887ef 100644 --- a/lib/CodeGen/EarlyIfConversion.cpp +++ b/lib/CodeGen/EarlyIfConversion.cpp @@ -461,7 +461,7 @@ void SSAIfConv::replacePHIInstrs() { DEBUG(dbgs() << "If-converting " << *PI.PHI); unsigned DstReg = PI.PHI->getOperand(0).getReg(); TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg); - DEBUG(dbgs() << " --> " << *llvm::prior(FirstTerm)); + DEBUG(dbgs() << " --> " << *std::prev(FirstTerm)); PI.PHI->eraseFromParent(); PI.PHI = 0; } @@ -482,7 +482,7 @@ void SSAIfConv::rewritePHIOperands() { unsigned PHIDst = PI.PHI->getOperand(0).getReg(); unsigned DstReg = MRI->createVirtualRegister(MRI->getRegClass(PHIDst)); TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg); - DEBUG(dbgs() << " --> " << *llvm::prior(FirstTerm)); + DEBUG(dbgs() << " --> " << *std::prev(FirstTerm)); // Rewrite PHI operands TPred -> (DstReg, Head), remove FPred. for (unsigned i = PI.PHI->getNumOperands(); i != 1; i -= 2) { @@ -590,9 +590,9 @@ class EarlyIfConverter : public MachineFunctionPass { public: static char ID; EarlyIfConverter() : MachineFunctionPass(ID) {} - void getAnalysisUsage(AnalysisUsage &AU) const; - bool runOnMachineFunction(MachineFunction &MF); - const char *getPassName() const { return "Early If-Conversion"; } + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnMachineFunction(MachineFunction &MF) override; + const char *getPassName() const override { return "Early If-Conversion"; } private: bool tryConvertIf(MachineBasicBlock*); diff --git a/lib/CodeGen/ErlangGC.cpp b/lib/CodeGen/ErlangGC.cpp index 8a1e2d9..e976d7f 100644 --- a/lib/CodeGen/ErlangGC.cpp +++ b/lib/CodeGen/ErlangGC.cpp @@ -32,7 +32,7 @@ namespace { DebugLoc DL) const; public: ErlangGC(); - bool findCustomSafePoints(GCFunctionInfo &FI, MachineFunction &MF); + bool findCustomSafePoints(GCFunctionInfo &FI, MachineFunction &MF) override; }; } diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp index 031f19c..a08eb6b 100644 --- a/lib/CodeGen/ExecutionDepsFix.cpp +++ b/lib/CodeGen/ExecutionDepsFix.cpp @@ -23,7 +23,7 @@ #define DEBUG_TYPE "execution-fix" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/PostOrderIterator.h" -#include "llvm/CodeGen/LiveRegUnits.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Allocator.h" @@ -141,7 +141,7 @@ class ExeDepsFix : public MachineFunctionPass { std::vector<std::pair<MachineInstr*, unsigned> > UndefReads; /// Storage for register unit liveness. - LiveRegUnits LiveUnits; + LivePhysRegs LiveRegSet; /// Current instruction number. /// The first instruction in each basic block is 0. @@ -155,14 +155,14 @@ public: ExeDepsFix(const TargetRegisterClass *rc) : MachineFunctionPass(ID), RC(rc), NumRegs(RC->getNumRegs()) {} - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); } - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; - virtual const char *getPassName() const { + const char *getPassName() const override { return "Execution dependency fix"; } @@ -352,7 +352,7 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { // Set up UndefReads to track undefined register reads. UndefReads.clear(); - LiveUnits.clear(); + LiveRegSet.clear(); // Set up LiveRegs to represent registers entering MBB. if (!LiveRegs) @@ -547,21 +547,19 @@ void ExeDepsFix::processUndefReads(MachineBasicBlock *MBB) { return; // Collect this block's live out register units. - LiveUnits.init(TRI); - for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) { - LiveUnits.addLiveIns(*SI, *TRI); - } + LiveRegSet.init(TRI); + LiveRegSet.addLiveOuts(MBB); + MachineInstr *UndefMI = UndefReads.back().first; unsigned OpIdx = UndefReads.back().second; for (MachineBasicBlock::reverse_iterator I = MBB->rbegin(), E = MBB->rend(); I != E; ++I) { - // Update liveness, including the current instrucion's defs. - LiveUnits.stepBackward(*I, *TRI); + // Update liveness, including the current instruction's defs. + LiveRegSet.stepBackward(*I); if (UndefMI == &*I) { - if (!LiveUnits.contains(UndefMI->getOperand(OpIdx).getReg(), *TRI)) + if (!LiveRegSet.contains(UndefMI->getOperand(OpIdx).getReg())) TII->breakPartialRegDependency(UndefMI, OpIdx, TRI); UndefReads.pop_back(); diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/ExpandISelPseudos.cpp index b2b6882..fb2e446 100644 --- a/lib/CodeGen/ExpandISelPseudos.cpp +++ b/lib/CodeGen/ExpandISelPseudos.cpp @@ -30,9 +30,9 @@ namespace { ExpandISelPseudos() : MachineFunctionPass(ID) {} private: - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { MachineFunctionPass::getAnalysisUsage(AU); } }; diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp index 6c73fff..1b0315a 100644 --- a/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -35,7 +35,7 @@ public: static char ID; // Pass identification, replacement for typeid ExpandPostRA() : MachineFunctionPass(ID) {} - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addPreservedID(MachineLoopInfoID); AU.addPreservedID(MachineDominatorsID); @@ -43,7 +43,7 @@ public: } /// runOnMachineFunction - pass entry point - bool runOnMachineFunction(MachineFunction&); + bool runOnMachineFunction(MachineFunction&) override; private: bool LowerSubregToReg(MachineInstr *MI); diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp index ef5247c..54b047b 100644 --- a/lib/CodeGen/GCMetadata.cpp +++ b/lib/CodeGen/GCMetadata.cpp @@ -32,12 +32,12 @@ namespace { public: explicit Printer(raw_ostream &OS) : FunctionPass(ID), OS(OS) {} - - const char *getPassName() const; - void getAnalysisUsage(AnalysisUsage &AU) const; - - bool runOnFunction(Function &F); - bool doFinalization(Module &M); + + const char *getPassName() const override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + + bool runOnFunction(Function &F) override; + bool doFinalization(Module &M) override; }; } diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp index 1173d11..b31a0f2 100644 --- a/lib/CodeGen/GCStrategy.cpp +++ b/lib/CodeGen/GCStrategy.cpp @@ -16,13 +16,12 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GCStrategy.h" -#include "llvm/Analysis/DominatorInternals.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" @@ -53,11 +52,11 @@ namespace { static char ID; LowerIntrinsics(); - const char *getPassName() const; - void getAnalysisUsage(AnalysisUsage &AU) const; + const char *getPassName() const override; + void getAnalysisUsage(AnalysisUsage &AU) const override; - bool doInitialization(Module &M); - bool runOnFunction(Function &F); + bool doInitialization(Module &M) override; + bool runOnFunction(Function &F) override; }; @@ -83,9 +82,9 @@ namespace { static char ID; GCMachineCodeAnalysis(); - void getAnalysisUsage(AnalysisUsage &AU) const; + void getAnalysisUsage(AnalysisUsage &AU) const override; - bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; }; } @@ -154,7 +153,7 @@ const char *LowerIntrinsics::getPassName() const { void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const { FunctionPass::getAnalysisUsage(AU); AU.addRequired<GCModuleInfo>(); - AU.addPreserved<DominatorTree>(); + AU.addPreserved<DominatorTreeWrapperPass>(); } /// doInitialization - If this module uses the GC intrinsics, find them now. @@ -271,8 +270,9 @@ bool LowerIntrinsics::runOnFunction(Function &F) { // Custom lowering may modify the CFG, so dominators must be recomputed. if (UseCustomLoweringPass) { - if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) - DT->DT->recalculate(F); + if (DominatorTreeWrapperPass *DTWP = + getAnalysisIfAvailable<DominatorTreeWrapperPass>()) + DTWP->getDomTree().recalculate(F); } return MadeChange; diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index e2d0eb4..1a18b1a 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -17,13 +17,13 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetSchedule.h" -#include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -162,8 +162,8 @@ namespace { const MachineBranchProbabilityInfo *MBPI; MachineRegisterInfo *MRI; - LiveRegUnits Redefs; - LiveRegUnits DontKill; + LivePhysRegs Redefs; + LivePhysRegs DontKill; bool PreRegAlloc; bool MadeChange; @@ -174,12 +174,12 @@ namespace { initializeIfConverterPass(*PassRegistry::getPassRegistry()); } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<MachineBranchProbabilityInfo>(); MachineFunctionPass::getAnalysisUsage(AU); } - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; private: bool ReverseBranchCondition(BBInfo &BBI); @@ -921,7 +921,7 @@ void IfConverter::AnalyzeBlocks(MachineFunction &MF, /// next block). static bool canFallThroughTo(MachineBasicBlock *BB, MachineBasicBlock *ToBB) { MachineFunction::iterator PI = BB; - MachineFunction::iterator I = llvm::next(PI); + MachineFunction::iterator I = std::next(PI); MachineFunction::iterator TI = ToBB; MachineFunction::iterator E = BB->getParent()->end(); while (I != TI) { @@ -968,23 +968,22 @@ void IfConverter::RemoveExtraEdges(BBInfo &BBI) { /// Behaves like LiveRegUnits::StepForward() but also adds implicit uses to all /// values defined in MI which are not live/used by MI. -static void UpdatePredRedefs(MachineInstr *MI, LiveRegUnits &Redefs, - const TargetRegisterInfo *TRI) { +static void UpdatePredRedefs(MachineInstr *MI, LivePhysRegs &Redefs) { for (ConstMIBundleOperands Ops(MI); Ops.isValid(); ++Ops) { if (!Ops->isReg() || !Ops->isKill()) continue; unsigned Reg = Ops->getReg(); if (Reg == 0) continue; - Redefs.removeReg(Reg, *TRI); + Redefs.removeReg(Reg); } for (MIBundleOperands Ops(MI); Ops.isValid(); ++Ops) { if (!Ops->isReg() || !Ops->isDef()) continue; unsigned Reg = Ops->getReg(); - if (Reg == 0 || Redefs.contains(Reg, *TRI)) + if (Reg == 0 || Redefs.contains(Reg)) continue; - Redefs.addReg(Reg, *TRI); + Redefs.addReg(Reg); MachineOperand &Op = *Ops; MachineInstr *MI = Op.getParent(); @@ -996,12 +995,11 @@ static void UpdatePredRedefs(MachineInstr *MI, LiveRegUnits &Redefs, /** * Remove kill flags from operands with a registers in the @p DontKill set. */ -static void RemoveKills(MachineInstr &MI, const LiveRegUnits &DontKill, - const MCRegisterInfo &MCRI) { +static void RemoveKills(MachineInstr &MI, const LivePhysRegs &DontKill) { for (MIBundleOperands O(&MI); O.isValid(); ++O) { if (!O->isReg() || !O->isKill()) continue; - if (DontKill.contains(O->getReg(), MCRI)) + if (DontKill.contains(O->getReg())) O->setIsKill(false); } } @@ -1012,10 +1010,10 @@ static void RemoveKills(MachineInstr &MI, const LiveRegUnits &DontKill, */ static void RemoveKills(MachineBasicBlock::iterator I, MachineBasicBlock::iterator E, - const LiveRegUnits &DontKill, + const LivePhysRegs &DontKill, const MCRegisterInfo &MCRI) { for ( ; I != E; ++I) - RemoveKills(*I, DontKill, MCRI); + RemoveKills(*I, DontKill); } /// IfConvertSimple - If convert a simple (split, no rejoin) sub-CFG. @@ -1049,13 +1047,13 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { // Initialize liveins to the first BB. These are potentiall redefined by // predicated instructions. Redefs.init(TRI); - Redefs.addLiveIns(CvtBBI->BB, *TRI); - Redefs.addLiveIns(NextBBI->BB, *TRI); + Redefs.addLiveIns(CvtBBI->BB); + Redefs.addLiveIns(NextBBI->BB); // Compute a set of registers which must not be killed by instructions in // BB1: This is everything live-in to BB2. DontKill.init(TRI); - DontKill.addLiveIns(NextBBI->BB, *TRI); + DontKill.addLiveIns(NextBBI->BB); if (CvtBBI->BB->pred_size() > 1) { BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); @@ -1104,6 +1102,28 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { return true; } +/// Scale down weights to fit into uint32_t. NewTrue is the new weight +/// for successor TrueBB, and NewFalse is the new weight for successor +/// FalseBB. +static void ScaleWeights(uint64_t NewTrue, uint64_t NewFalse, + MachineBasicBlock *MBB, + const MachineBasicBlock *TrueBB, + const MachineBasicBlock *FalseBB, + const MachineBranchProbabilityInfo *MBPI) { + uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse; + uint32_t Scale = (NewMax / UINT32_MAX) + 1; + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); + SI != SE; ++SI) { + if (*SI == TrueBB) + MBB->setSuccWeight(SI, (uint32_t)(NewTrue / Scale)); + else if (*SI == FalseBB) + MBB->setSuccWeight(SI, (uint32_t)(NewFalse / Scale)); + else + MBB->setSuccWeight(SI, MBPI->getEdgeWeight(MBB, SI) / Scale); + } +} + /// IfConvertTriangle - If convert a triangle sub-CFG. /// bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { @@ -1154,12 +1174,22 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { // Initialize liveins to the first BB. These are potentially redefined by // predicated instructions. Redefs.init(TRI); - Redefs.addLiveIns(CvtBBI->BB, *TRI); - Redefs.addLiveIns(NextBBI->BB, *TRI); + Redefs.addLiveIns(CvtBBI->BB); + Redefs.addLiveIns(NextBBI->BB); DontKill.clear(); bool HasEarlyExit = CvtBBI->FalseBB != NULL; + uint64_t CvtNext = 0, CvtFalse = 0, BBNext = 0, BBCvt = 0, SumWeight = 0; + uint32_t WeightScale = 0; + if (HasEarlyExit) { + // Get weights before modifying CvtBBI->BB and BBI.BB. + CvtNext = MBPI->getEdgeWeight(CvtBBI->BB, NextBBI->BB); + CvtFalse = MBPI->getEdgeWeight(CvtBBI->BB, CvtBBI->FalseBB); + BBNext = MBPI->getEdgeWeight(BBI.BB, NextBBI->BB); + BBCvt = MBPI->getEdgeWeight(BBI.BB, CvtBBI->BB); + SumWeight = MBPI->getSumForBlock(CvtBBI->BB, WeightScale); + } if (CvtBBI->BB->pred_size() > 1) { BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); // Copy instructions in the true block, predicate them, and add them to @@ -1187,6 +1217,20 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { llvm_unreachable("Unable to reverse branch condition!"); TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, NULL, RevCond, dl); BBI.BB->addSuccessor(CvtBBI->FalseBB); + // Update the edge weight for both CvtBBI->FalseBB and NextBBI. + // New_Weight(BBI.BB, NextBBI->BB) = + // Weight(BBI.BB, NextBBI->BB) * getSumForBlock(CvtBBI->BB) + + // Weight(BBI.BB, CvtBBI->BB) * Weight(CvtBBI->BB, NextBBI->BB) + // New_Weight(BBI.BB, CvtBBI->FalseBB) = + // Weight(BBI.BB, CvtBBI->BB) * Weight(CvtBBI->BB, CvtBBI->FalseBB) + + uint64_t NewNext = BBNext * SumWeight + (BBCvt * CvtNext) / WeightScale; + uint64_t NewFalse = (BBCvt * CvtFalse) / WeightScale; + // We need to scale down all weights of BBI.BB to fit uint32_t. + // Here BBI.BB is connected to CvtBBI->FalseBB and will fall through to + // the next block. + ScaleWeights(NewNext, NewFalse, BBI.BB, getNextBlock(BBI.BB), + CvtBBI->FalseBB, MBPI); } // Merge in the 'false' block if the 'false' block has no other @@ -1284,7 +1328,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, // Initialize liveins to the first BB. These are potentially redefined by // predicated instructions. Redefs.init(TRI); - Redefs.addLiveIns(BBI1->BB, *TRI); + Redefs.addLiveIns(BBI1->BB); // Remove the duplicated instructions at the beginnings of both paths. MachineBasicBlock::iterator DI1 = BBI1->BB->begin(); @@ -1317,12 +1361,12 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, DontKill.init(TRI); for (MachineBasicBlock::reverse_iterator I = BBI2->BB->rbegin(), E = MachineBasicBlock::reverse_iterator(DI2); I != E; ++I) { - DontKill.stepBackward(*I, *TRI); + DontKill.stepBackward(*I); } for (MachineBasicBlock::const_iterator I = BBI1->BB->begin(), E = DI1; I != E; ++I) { - Redefs.stepForward(*I, *TRI); + Redefs.stepForward(*I); } BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1); BBI2->BB->erase(BBI2->BB->begin(), DI2); @@ -1506,7 +1550,7 @@ void IfConverter::PredicateBlock(BBInfo &BBI, // If the predicated instruction now redefines a register as the result of // if-conversion, add an implicit kill. - UpdatePredRedefs(I, Redefs, TRI); + UpdatePredRedefs(I, Redefs); } std::copy(Cond.begin(), Cond.end(), std::back_inserter(BBI.Predicate)); @@ -1552,11 +1596,11 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, // If the predicated instruction now redefines a register as the result of // if-conversion, add an implicit kill. - UpdatePredRedefs(MI, Redefs, TRI); + UpdatePredRedefs(MI, Redefs); // Some kill flags may not be correct anymore. if (!DontKill.empty()) - RemoveKills(*MI, DontKill, *TRI); + RemoveKills(*MI, DontKill); } if (!IgnoreBr) { diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index bb0e642..0f7ba8e 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -21,8 +21,9 @@ #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveStackAnalysis.h" -#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -153,7 +154,7 @@ public: TRI(*mf.getTarget().getRegisterInfo()), MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()) {} - void spill(LiveRangeEdit &); + void spill(LiveRangeEdit &) override; private: bool isSnippet(const LiveInterval &SnipLI); @@ -238,9 +239,10 @@ bool InlineSpiller::isSnippet(const LiveInterval &SnipLI) { MachineInstr *UseMI = 0; // Check that all uses satisfy our criteria. - for (MachineRegisterInfo::reg_nodbg_iterator - RI = MRI.reg_nodbg_begin(SnipLI.reg); - MachineInstr *MI = RI.skipInstruction();) { + for (MachineRegisterInfo::reg_instr_nodbg_iterator + RI = MRI.reg_instr_nodbg_begin(SnipLI.reg), + E = MRI.reg_instr_nodbg_end(); RI != E; ) { + MachineInstr *MI = &*(RI++); // Allow copies to/from Reg. if (isFullCopyOf(MI, Reg)) @@ -277,8 +279,9 @@ void InlineSpiller::collectRegsToSpill() { if (Original == Reg) return; - for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Reg); - MachineInstr *MI = RI.skipInstruction();) { + for (MachineRegisterInfo::reg_instr_iterator + RI = MRI.reg_instr_begin(Reg), E = MRI.reg_instr_end(); RI != E; ) { + MachineInstr *MI = &*(RI++); unsigned SnipReg = isFullCopyOf(MI, Reg); if (!isSibling(SnipReg)) continue; @@ -438,7 +441,20 @@ void InlineSpiller::propagateSiblingValue(SibValueMap::iterator SVIIter, // Also hoist spills to blocks with smaller loop depth, but make sure // that the new value dominates. Non-phi dependents are always // dominated, phis need checking. + + const BranchProbability MarginProb(4, 5); // 80% + // Hoist a spill to outer loop if there are multiple dependents (it + // can be beneficial if more than one dependents are hoisted) or + // if DepSV (the hoisting source) is hotter than SV (the hoisting + // destination) (we add a 80% margin to bias a little towards + // loop depth). + bool HoistCondition = + (MBFI.getBlockFreq(DepSV.SpillMBB) >= + (MBFI.getBlockFreq(SV.SpillMBB) * MarginProb)) || + Deps->size() > 1; + if ((Loops.getLoopDepth(DepSV.SpillMBB) > SpillDepth) && + HoistCondition && (!DepSVI->first->isPHIDef() || MDT.dominates(SV.SpillMBB, DepSV.SpillMBB))) { Changed = true; @@ -476,7 +492,7 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, // Check if a cached value already exists. SibValueMap::iterator SVI; bool Inserted; - tie(SVI, Inserted) = + std::tie(SVI, Inserted) = SibValues.insert(std::make_pair(UseVNI, SibValueInfo(UseReg, UseVNI))); if (!Inserted) { DEBUG(dbgs() << "Cached value " << PrintReg(UseReg) << ':' @@ -495,7 +511,7 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, do { unsigned Reg; VNInfo *VNI; - tie(Reg, VNI) = WorkList.pop_back_val(); + std::tie(Reg, VNI) = WorkList.pop_back_val(); DEBUG(dbgs() << " " << PrintReg(Reg) << ':' << VNI->id << '@' << VNI->def << ":\t"); @@ -554,7 +570,7 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, for (unsigned i = 0, e = NonPHIs.size(); i != e; ++i) { VNInfo *NonPHI = NonPHIs[i]; // Known value? Try an insertion. - tie(SVI, Inserted) = + std::tie(SVI, Inserted) = SibValues.insert(std::make_pair(NonPHI, SibValueInfo(Reg, NonPHI))); // Add all the PHIs as dependents of NonPHI. for (unsigned pi = 0, pe = PHIs.size(); pi != pe; ++pi) @@ -587,8 +603,8 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, << SrcVNI->id << '@' << SrcVNI->def << " kill=" << unsigned(SVI->second.KillsSource) << '\n'); // Known sibling source value? Try an insertion. - tie(SVI, Inserted) = SibValues.insert(std::make_pair(SrcVNI, - SibValueInfo(SrcReg, SrcVNI))); + std::tie(SVI, Inserted) = SibValues.insert( + std::make_pair(SrcVNI, SibValueInfo(SrcReg, SrcVNI))); // This is the first time we see Src, add it to the worklist. if (Inserted) WorkList.push_back(std::make_pair(SrcReg, SrcVNI)); @@ -745,7 +761,7 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) { do { LiveInterval *LI; - tie(LI, VNI) = WorkList.pop_back_val(); + std::tie(LI, VNI) = WorkList.pop_back_val(); unsigned Reg = LI->reg; DEBUG(dbgs() << "Checking redundant spills for " << VNI->id << '@' << VNI->def << " in " << *LI << '\n'); @@ -759,8 +775,10 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) { DEBUG(dbgs() << "Merged to stack int: " << *StackInt << '\n'); // Find all spills and copies of VNI. - for (MachineRegisterInfo::use_nodbg_iterator UI = MRI.use_nodbg_begin(Reg); - MachineInstr *MI = UI.skipInstruction();) { + for (MachineRegisterInfo::use_instr_nodbg_iterator + UI = MRI.use_instr_nodbg_begin(Reg), E = MRI.use_instr_nodbg_end(); + UI != E; ) { + MachineInstr *MI = &*(UI++); if (!MI->isCopy() && !MI->mayStore()) continue; SlotIndex Idx = LIS.getInstructionIndex(MI); @@ -804,7 +822,7 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) { SmallVector<std::pair<LiveInterval*, VNInfo*>, 8> WorkList; WorkList.push_back(std::make_pair(LI, VNI)); do { - tie(LI, VNI) = WorkList.pop_back_val(); + std::tie(LI, VNI) = WorkList.pop_back_val(); if (!UsedValues.insert(VNI)) continue; @@ -920,10 +938,12 @@ void InlineSpiller::reMaterializeAll() { for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) { unsigned Reg = RegsToSpill[i]; LiveInterval &LI = LIS.getInterval(Reg); - for (MachineRegisterInfo::use_nodbg_iterator - RI = MRI.use_nodbg_begin(Reg); - MachineInstr *MI = RI.skipBundle();) + for (MachineRegisterInfo::use_bundle_nodbg_iterator + RI = MRI.use_bundle_nodbg_begin(Reg), E = MRI.use_bundle_nodbg_end(); + RI != E; ) { + MachineInstr *MI = &*(RI++); anyRemat |= reMaterializeFor(LI, MI); + } } if (!anyRemat) return; @@ -1014,7 +1034,7 @@ static void dumpMachineInstrRangeWithSlotIndex(MachineBasicBlock::iterator B, char NextLine = '\n'; char SlotIndent = '\t'; - if (llvm::next(B) == E) { + if (std::next(B) == E) { NextLine = ' '; SlotIndent = ' '; } @@ -1098,12 +1118,11 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops, MRI.isReserved(Reg)) { continue; } + // Skip non-Defs, including undef uses and internal reads. + if (MO->isUse()) + continue; MIBundleOperands::PhysRegInfo RI = MIBundleOperands(FoldMI).analyzePhysReg(Reg, &TRI); - if (MO->readsReg()) { - assert(RI.Reads && "Cannot fold physreg reader"); - continue; - } if (RI.Defines) continue; // FoldMI does not define this physreg. Remove the LI segment. @@ -1172,12 +1191,12 @@ void InlineSpiller::insertSpill(unsigned NewVReg, bool isKill, MachineBasicBlock &MBB = *MI->getParent(); MachineInstrSpan MIS(MI); - TII.storeRegToStackSlot(MBB, llvm::next(MI), NewVReg, isKill, StackSlot, + TII.storeRegToStackSlot(MBB, std::next(MI), NewVReg, isKill, StackSlot, MRI.getRegClass(NewVReg), &TRI); - LIS.InsertMachineInstrRangeInMaps(llvm::next(MI), MIS.end()); + LIS.InsertMachineInstrRangeInMaps(std::next(MI), MIS.end()); - DEBUG(dumpMachineInstrRangeWithSlotIndex(llvm::next(MI), MIS.end(), LIS, + DEBUG(dumpMachineInstrRangeWithSlotIndex(std::next(MI), MIS.end(), LIS, "spill")); ++NumSpills; } @@ -1188,8 +1207,10 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { LiveInterval &OldLI = LIS.getInterval(Reg); // Iterate over instructions using Reg. - for (MachineRegisterInfo::reg_iterator RegI = MRI.reg_begin(Reg); - MachineInstr *MI = RegI.skipBundle();) { + for (MachineRegisterInfo::reg_bundle_iterator + RegI = MRI.reg_bundle_begin(Reg), E = MRI.reg_bundle_end(); + RegI != E; ) { + MachineInstr *MI = &*(RegI++); // Debug values are not allowed to affect codegen. if (MI->isDebugValue()) { @@ -1314,8 +1335,10 @@ void InlineSpiller::spillAll() { // Finally delete the SnippetCopies. for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) { - for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(RegsToSpill[i]); - MachineInstr *MI = RI.skipInstruction();) { + for (MachineRegisterInfo::reg_instr_iterator + RI = MRI.reg_instr_begin(RegsToSpill[i]), E = MRI.reg_instr_end(); + RI != E; ) { + MachineInstr *MI = &*(RI++); assert(SnippetCopies.count(MI) && "Remaining use wasn't a snippet copy"); // FIXME: Do this with a LiveRangeEdit callback. LIS.RemoveMachineInstrFromMaps(MI); diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp index 427225d..61d065a 100644 --- a/lib/CodeGen/InterferenceCache.cpp +++ b/lib/CodeGen/InterferenceCache.cpp @@ -22,6 +22,22 @@ using namespace llvm; // Static member used for null interference cursors. InterferenceCache::BlockInterference InterferenceCache::Cursor::NoInterference; +// Initializes PhysRegEntries (instead of a SmallVector, PhysRegEntries is a +// buffer of size NumPhysRegs to speed up alloc/clear for targets with large +// reg files). Calloced memory is used for good form, and quites tools like +// Valgrind too, but zero initialized memory is not required by the algorithm: +// this is because PhysRegEntries works like a SparseSet and its entries are +// only valid when there is a corresponding CacheEntries assignment. There is +// also support for when pass managers are reused for targets with different +// numbers of PhysRegs: in this case PhysRegEntries is freed and reinitialized. +void InterferenceCache::reinitPhysRegEntries() { + if (PhysRegEntriesCount == TRI->getNumRegs()) return; + free(PhysRegEntries); + PhysRegEntriesCount = TRI->getNumRegs(); + PhysRegEntries = (unsigned char*) + calloc(PhysRegEntriesCount, sizeof(unsigned char)); +} + void InterferenceCache::init(MachineFunction *mf, LiveIntervalUnion *liuarray, SlotIndexes *indexes, @@ -30,7 +46,7 @@ void InterferenceCache::init(MachineFunction *mf, MF = mf; LIUArray = liuarray; TRI = tri; - PhysRegEntries.assign(TRI->getNumRegs(), 0); + reinitPhysRegEntries(); for (unsigned i = 0; i != CacheEntries; ++i) Entries[i].clear(mf, indexes, lis); } @@ -105,7 +121,7 @@ bool InterferenceCache::Entry::valid(LiveIntervalUnion *LIUArray, void InterferenceCache::Entry::update(unsigned MBBNum) { SlotIndex Start, Stop; - tie(Start, Stop) = Indexes->getMBBRange(MBBNum); + std::tie(Start, Stop) = Indexes->getMBBRange(MBBNum); // Use advanceTo only when possible. if (PrevPos != Start) { @@ -182,7 +198,7 @@ void InterferenceCache::Entry::update(unsigned MBBNum) { BI = &Blocks[MBBNum]; if (BI->Tag == Tag) return; - tie(Start, Stop) = Indexes->getMBBRange(MBBNum); + std::tie(Start, Stop) = Indexes->getMBBRange(MBBNum); } // Check for last interference in block. diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h index 800f705..d3482d0 100644 --- a/lib/CodeGen/InterferenceCache.h +++ b/lib/CodeGen/InterferenceCache.h @@ -135,7 +135,8 @@ class InterferenceCache { // Point to an entry for each physreg. The entry pointed to may not be up to // date, and it may have been reused for a different physreg. - SmallVector<unsigned char, 2> PhysRegEntries; + unsigned char* PhysRegEntries; + size_t PhysRegEntriesCount; // Next round-robin entry to be picked. unsigned RoundRobin; @@ -147,7 +148,14 @@ class InterferenceCache { Entry *get(unsigned PhysReg); public: - InterferenceCache() : TRI(0), LIUArray(0), MF(0), RoundRobin(0) {} + InterferenceCache() : TRI(0), LIUArray(0), MF(0), PhysRegEntries(NULL), + PhysRegEntriesCount(0), RoundRobin(0) {} + + ~InterferenceCache() { + free(PhysRegEntries); + } + + void reinitPhysRegEntries(); /// init - Prepare cache for a new function. void init(MachineFunction*, LiveIntervalUnion*, SlotIndexes*, LiveIntervals*, diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp index c38d4fb..9977c6b 100644 --- a/lib/CodeGen/IntrinsicLowering.cpp +++ b/lib/CodeGen/IntrinsicLowering.cpp @@ -13,13 +13,13 @@ #include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" -#include "llvm/Support/CallSite.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -115,21 +115,21 @@ void IntrinsicLowering::AddPrototypes(Module &M) { Type::getInt8PtrTy(Context), Type::getInt8PtrTy(Context), Type::getInt8PtrTy(Context), - TD.getIntPtrType(Context), (Type *)0); + DL.getIntPtrType(Context), (Type *)0); break; case Intrinsic::memmove: M.getOrInsertFunction("memmove", Type::getInt8PtrTy(Context), Type::getInt8PtrTy(Context), Type::getInt8PtrTy(Context), - TD.getIntPtrType(Context), (Type *)0); + DL.getIntPtrType(Context), (Type *)0); break; case Intrinsic::memset: M.getOrInsertFunction("memset", Type::getInt8PtrTy(Context), Type::getInt8PtrTy(Context), Type::getInt32Ty(M.getContext()), - TD.getIntPtrType(Context), (Type *)0); + DL.getIntPtrType(Context), (Type *)0); break; case Intrinsic::sqrt: EnsureFPIntrinsicsExist(M, I, "sqrtf", "sqrt", "sqrtl"); @@ -463,7 +463,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { break; // Strip out annotate intrinsic case Intrinsic::memcpy: { - Type *IntPtr = TD.getIntPtrType(Context); + Type *IntPtr = DL.getIntPtrType(Context); Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, /* isSigned */ false); Value *Ops[3]; @@ -474,7 +474,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { break; } case Intrinsic::memmove: { - Type *IntPtr = TD.getIntPtrType(Context); + Type *IntPtr = DL.getIntPtrType(Context); Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, /* isSigned */ false); Value *Ops[3]; @@ -486,7 +486,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { } case Intrinsic::memset: { Value *Op0 = CI->getArgOperand(0); - Type *IntPtr = TD.getIntPtrType(Op0->getType()); + Type *IntPtr = DL.getIntPtrType(Op0->getType()); Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, /* isSigned */ false); Value *Ops[3]; diff --git a/lib/CodeGen/LLVMBuild.txt b/lib/CodeGen/LLVMBuild.txt index 81ef1aa..fee0347 100644 --- a/lib/CodeGen/LLVMBuild.txt +++ b/lib/CodeGen/LLVMBuild.txt @@ -22,4 +22,4 @@ subdirectories = AsmPrinter SelectionDAG type = Library name = CodeGen parent = Libraries -required_libraries = Analysis Core MC Scalar Support Target TransformUtils ObjCARC +required_libraries = Analysis Core MC Scalar Support Target TransformUtils diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index ad2c553..9c2718b 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -12,12 +12,11 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/TargetMachine.h" -#include "llvm/ADT/OwningPtr.h" -#include "llvm/Assembly/PrintModulePass.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/IR/IRPrintingPasses.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstrInfo.h" @@ -63,14 +62,23 @@ static bool getVerboseAsm() { } void LLVMTargetMachine::initAsmInfo() { - AsmInfo = TheTarget.createMCAsmInfo(*getRegisterInfo(), TargetTriple); + MCAsmInfo *TmpAsmInfo = TheTarget.createMCAsmInfo(*getRegisterInfo(), + TargetTriple); // TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0, // and if the old one gets included then MCAsmInfo will be NULL and // we'll crash later. // Provide the user with a useful error message about what's wrong. - assert(AsmInfo && "MCAsmInfo not initialized. " + assert(TmpAsmInfo && "MCAsmInfo not initialized. " "Make sure you include the correct TargetSelect.h" "and that InitializeAllTargetMCs() is being invoked!"); + + if (Options.DisableIntegratedAS) + TmpAsmInfo->setUseIntegratedAssembler(false); + + if (Options.CompressDebugSections) + TmpAsmInfo->setCompressDebugSections(true); + + AsmInfo = TmpAsmInfo; } LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple, @@ -92,6 +100,9 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM, bool DisableVerify, AnalysisID StartAfter, AnalysisID StopAfter) { + // Add internal analysis passes from the target machine. + TM->addAnalysisPasses(PM); + // Targets may override createPassConfig to provide a target-specific sublass. TargetPassConfig *PassConfig = TM->createPassConfig(PM); PassConfig->setStartStopPasses(StartAfter, StopAfter); @@ -154,7 +165,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, // machine-level pass), and whatever other information is needed to // deserialize the code and resume compilation. For now, just write the // LLVM IR. - PM.add(createPrintModulePass(&Out)); + PM.add(createPrintModulePass(Out)); return false; } @@ -165,7 +176,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, const MCRegisterInfo &MRI = *getRegisterInfo(); const MCInstrInfo &MII = *getInstrInfo(); const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); - OwningPtr<MCStreamer> AsmStreamer; + std::unique_ptr<MCStreamer> AsmStreamer; switch (FileType) { case CGFT_AssemblyFile: { @@ -182,7 +193,6 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, TargetCPU); MCStreamer *S = getTarget().createAsmStreamer(*Context, Out, getVerboseAsm(), - hasMCUseLoc(), hasMCUseCFI(), hasMCUseDwarfDirectory(), InstPrinter, @@ -201,11 +211,9 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, if (MCE == 0 || MAB == 0) return true; - AsmStreamer.reset(getTarget().createMCObjectStreamer(getTargetTriple(), - *Context, *MAB, Out, - MCE, hasMCRelaxAll(), - hasMCNoExecStack())); - AsmStreamer.get()->setAutoInitSections(true); + AsmStreamer.reset(getTarget().createMCObjectStreamer( + getTargetTriple(), *Context, *MAB, Out, MCE, STI, hasMCRelaxAll(), + hasMCNoExecStack())); break; } case CGFT_Null: @@ -221,7 +229,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, return true; // If successful, createAsmPrinter took ownership of AsmStreamer. - AsmStreamer.take(); + AsmStreamer.release(); PM.add(Printer); @@ -275,12 +283,10 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, if (MCE == 0 || MAB == 0) return true; - OwningPtr<MCStreamer> AsmStreamer; - AsmStreamer.reset(getTarget().createMCObjectStreamer(getTargetTriple(), *Ctx, - *MAB, Out, MCE, - hasMCRelaxAll(), - hasMCNoExecStack())); - AsmStreamer.get()->InitSections(); + std::unique_ptr<MCStreamer> AsmStreamer; + AsmStreamer.reset(getTarget().createMCObjectStreamer( + getTargetTriple(), *Ctx, *MAB, Out, MCE, STI, hasMCRelaxAll(), + hasMCNoExecStack())); // Create the AsmPrinter, which takes ownership of AsmStreamer if successful. FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer); @@ -288,7 +294,7 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, return true; // If successful, createAsmPrinter took ownership of AsmStreamer. - AsmStreamer.take(); + AsmStreamer.release(); PM.add(Printer); diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp index deab05a..e88d537 100644 --- a/lib/CodeGen/LatencyPriorityQueue.cpp +++ b/lib/CodeGen/LatencyPriorityQueue.cpp @@ -119,12 +119,12 @@ void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) { SUnit *LatencyPriorityQueue::pop() { if (empty()) return NULL; std::vector<SUnit *>::iterator Best = Queue.begin(); - for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()), + for (std::vector<SUnit *>::iterator I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I) if (Picker(*Best, *I)) Best = I; SUnit *V = *Best; - if (Best != prior(Queue.end())) + if (Best != std::prev(Queue.end())) std::swap(*Best, Queue.back()); Queue.pop_back(); return V; @@ -133,7 +133,7 @@ SUnit *LatencyPriorityQueue::pop() { void LatencyPriorityQueue::remove(SUnit *SU) { assert(!Queue.empty() && "Queue is empty!"); std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU); - if (I != prior(Queue.end())) + if (I != std::prev(Queue.end())) std::swap(*I, Queue.back()); Queue.pop_back(); } diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp index ffe407a..c22ab11 100644 --- a/lib/CodeGen/LexicalScopes.cpp +++ b/lib/CodeGen/LexicalScopes.cpp @@ -18,19 +18,18 @@ #include "llvm/CodeGen/LexicalScopes.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/DebugInfo.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" using namespace llvm; -LexicalScopes::~LexicalScopes() { - releaseMemory(); -} +/// ~LexicalScopes - final cleanup after ourselves. +LexicalScopes::~LexicalScopes() { reset(); } -/// releaseMemory - release memory. -void LexicalScopes::releaseMemory() { +/// reset - Reset the instance so that it's prepared for another function. +void LexicalScopes::reset() { MF = NULL; CurrentFnLexicalScope = NULL; DeleteContainerSeconds(LexicalScopeMap); @@ -41,7 +40,7 @@ void LexicalScopes::releaseMemory() { /// initialize - Scan machine function and constuct lexical scope nest. void LexicalScopes::initialize(const MachineFunction &Fn) { - releaseMemory(); + reset(); MF = &Fn; SmallVector<InsnRange, 4> MIRanges; DenseMap<const MachineInstr *, LexicalScope *> MI2ScopeMap; @@ -54,13 +53,13 @@ void LexicalScopes::initialize(const MachineFunction &Fn) { /// extractLexicalScopes - Extract instruction ranges for each lexical scopes /// for the given machine function. -void LexicalScopes:: -extractLexicalScopes(SmallVectorImpl<InsnRange> &MIRanges, - DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap) { +void LexicalScopes::extractLexicalScopes( + SmallVectorImpl<InsnRange> &MIRanges, + DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap) { // Scan each instruction and create scopes. First build working set of scopes. - for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); - I != E; ++I) { + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E; + ++I) { const MachineInstr *RangeBeginMI = NULL; const MachineInstr *PrevMI = NULL; DebugLoc PrevDL; @@ -117,14 +116,15 @@ LexicalScope *LexicalScopes::findLexicalScope(DebugLoc DL) { MDNode *Scope = NULL; MDNode *IA = NULL; DL.getScopeAndInlinedAt(Scope, IA, MF->getFunction()->getContext()); - if (!Scope) return NULL; + if (!Scope) + return NULL; // The scope that we were created with could have an extra file - which // isn't what we care about in this case. DIDescriptor D = DIDescriptor(Scope); if (D.isLexicalBlockFile()) Scope = DILexicalBlockFile(Scope).getScope(); - + if (IA) return InlinedLexicalScopeMap.lookup(DebugLoc::getFromDILocation(IA)); return LexicalScopeMap.lookup(Scope); @@ -143,7 +143,7 @@ LexicalScope *LexicalScopes::getOrCreateLexicalScope(DebugLoc DL) { // Create an inlined scope for inlined function. return getOrCreateInlinedScope(Scope, InlinedAt); } - + return getOrCreateRegularScope(Scope); } @@ -154,7 +154,7 @@ LexicalScope *LexicalScopes::getOrCreateRegularScope(MDNode *Scope) { Scope = DILexicalBlockFile(Scope).getScope(); D = DIDescriptor(Scope); } - + LexicalScope *WScope = LexicalScopeMap.lookup(Scope); if (WScope) return WScope; @@ -164,15 +164,15 @@ LexicalScope *LexicalScopes::getOrCreateRegularScope(MDNode *Scope) { Parent = getOrCreateLexicalScope(DebugLoc::getFromDILexicalBlock(Scope)); WScope = new LexicalScope(Parent, DIDescriptor(Scope), NULL, false); LexicalScopeMap.insert(std::make_pair(Scope, WScope)); - if (!Parent && DIDescriptor(Scope).isSubprogram() - && DISubprogram(Scope).describes(MF->getFunction())) + if (!Parent && DIDescriptor(Scope).isSubprogram() && + DISubprogram(Scope).describes(MF->getFunction())) CurrentFnLexicalScope = WScope; - + return WScope; } /// getOrCreateInlinedScope - Find or create an inlined lexical scope. -LexicalScope *LexicalScopes::getOrCreateInlinedScope(MDNode *Scope, +LexicalScope *LexicalScopes::getOrCreateInlinedScope(MDNode *Scope, MDNode *InlinedAt) { LexicalScope *InlinedScope = LexicalScopeMap.lookup(InlinedAt); if (InlinedScope) @@ -212,7 +212,7 @@ LexicalScope *LexicalScopes::getOrCreateAbstractScope(const MDNode *N) { /// constructScopeNest void LexicalScopes::constructScopeNest(LexicalScope *Scope) { - assert (Scope && "Unable to calculate scope dominance graph!"); + assert(Scope && "Unable to calculate scope dominance graph!"); SmallVector<LexicalScope *, 4> WorkStack; WorkStack.push_back(Scope); unsigned Counter = 0; @@ -221,7 +221,8 @@ void LexicalScopes::constructScopeNest(LexicalScope *Scope) { const SmallVectorImpl<LexicalScope *> &Children = WS->getChildren(); bool visitedChildren = false; for (SmallVectorImpl<LexicalScope *>::const_iterator SI = Children.begin(), - SE = Children.end(); SI != SE; ++SI) { + SE = Children.end(); + SI != SE; ++SI) { LexicalScope *ChildScope = *SI; if (!ChildScope->getDFSOut()) { WorkStack.push_back(ChildScope); @@ -239,17 +240,17 @@ void LexicalScopes::constructScopeNest(LexicalScope *Scope) { /// assignInstructionRanges - Find ranges of instructions covered by each /// lexical scope. -void LexicalScopes:: -assignInstructionRanges(SmallVectorImpl<InsnRange> &MIRanges, - DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap) -{ - +void LexicalScopes::assignInstructionRanges( + SmallVectorImpl<InsnRange> &MIRanges, + DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap) { + LexicalScope *PrevLexicalScope = NULL; for (SmallVectorImpl<InsnRange>::const_iterator RI = MIRanges.begin(), - RE = MIRanges.end(); RI != RE; ++RI) { + RE = MIRanges.end(); + RI != RE; ++RI) { const InsnRange &R = *RI; LexicalScope *S = MI2ScopeMap.lookup(R.first); - assert (S && "Lost LexicalScope for a machine instruction!"); + assert(S && "Lost LexicalScope for a machine instruction!"); if (PrevLexicalScope && !PrevLexicalScope->dominates(S)) PrevLexicalScope->closeInsnRange(S); S->openInsnRange(R.first); @@ -262,26 +263,26 @@ assignInstructionRanges(SmallVectorImpl<InsnRange> &MIRanges, } /// getMachineBasicBlocks - Populate given set using machine basic blocks which -/// have machine instructions that belong to lexical scope identified by +/// have machine instructions that belong to lexical scope identified by /// DebugLoc. -void LexicalScopes:: -getMachineBasicBlocks(DebugLoc DL, - SmallPtrSet<const MachineBasicBlock*, 4> &MBBs) { +void LexicalScopes::getMachineBasicBlocks( + DebugLoc DL, SmallPtrSet<const MachineBasicBlock *, 4> &MBBs) { MBBs.clear(); LexicalScope *Scope = getOrCreateLexicalScope(DL); if (!Scope) return; - + if (Scope == CurrentFnLexicalScope) { - for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); - I != E; ++I) + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E; + ++I) MBBs.insert(I); return; } SmallVectorImpl<InsnRange> &InsnRanges = Scope->getRanges(); for (SmallVectorImpl<InsnRange>::iterator I = InsnRanges.begin(), - E = InsnRanges.end(); I != E; ++I) { + E = InsnRanges.end(); + I != E; ++I) { InsnRange &R = *I; MBBs.insert(R.first->getParent()); } @@ -299,8 +300,8 @@ bool LexicalScopes::dominates(DebugLoc DL, MachineBasicBlock *MBB) { return true; bool Result = false; - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); - I != E; ++I) { + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; + ++I) { DebugLoc IDL = I->getDebugLoc(); if (IDL.isUnknown()) continue; @@ -311,8 +312,6 @@ bool LexicalScopes::dominates(DebugLoc DL, MachineBasicBlock *MBB) { return Result; } -void LexicalScope::anchor() { } - /// dump - Print data structures. void LexicalScope::dump(unsigned Indent) const { #ifndef NDEBUG @@ -332,4 +331,3 @@ void LexicalScope::dump(unsigned Indent) const { Children[i]->dump(Indent + 2); #endif } - diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index 25645e0..bef4156 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -31,8 +31,8 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/VirtRegMap.h" -#include "llvm/DebugInfo.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Value.h" #include "llvm/Support/CommandLine.h" @@ -72,7 +72,7 @@ LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID), pImpl(0) { typedef IntervalMap<SlotIndex, unsigned, 4> LocMap; namespace { -/// UserValueScopes - Keeps track of lexical scopes associated with an +/// UserValueScopes - Keeps track of lexical scopes associated with a /// user value's source location. class UserValueScopes { DebugLoc DL; @@ -480,7 +480,7 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) { // DBG_VALUE has no slot index, use the previous instruction instead. SlotIndex Idx = MBBI == MBB->begin() ? LIS->getMBBStartIdx(MBB) : - LIS->getInstructionIndex(llvm::prior(MBBI)).getRegSlot(); + LIS->getInstructionIndex(std::prev(MBBI)).getRegSlot(); // Handle consecutive DBG_VALUE instructions with the same slot index. do { if (handleDebugValue(MBBI, Idx)) { @@ -568,13 +568,11 @@ UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo, // Collect all the (vreg, valno) pairs that are copies of LI. SmallVector<std::pair<LiveInterval*, const VNInfo*>, 8> CopyValues; - for (MachineRegisterInfo::use_nodbg_iterator - UI = MRI.use_nodbg_begin(LI->reg), - UE = MRI.use_nodbg_end(); UI != UE; ++UI) { + for (MachineOperand &MO : MRI.use_nodbg_operands(LI->reg)) { + MachineInstr *MI = MO.getParent(); // Copies of the full value. - if (UI.getOperand().getSubReg() || !UI->isCopy()) + if (MO.getSubReg() || !MI->isCopy()) continue; - MachineInstr *MI = &*UI; unsigned DstReg = MI->getOperand(0).getReg(); // Don't follow copies to physregs. These are usually setting up call @@ -704,7 +702,6 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf) { bool Changed = collectDebugValues(mf); computeIntervals(); DEBUG(print(dbgs())); - LS.releaseMemory(); ModifiedMF = Changed; return Changed; } @@ -915,7 +912,7 @@ findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx, // Don't insert anything after the first terminator, though. return MI->isTerminator() ? MBB->getFirstTerminator() : - llvm::next(MachineBasicBlock::iterator(MI)); + std::next(MachineBasicBlock::iterator(MI)); } DebugLoc UserValue::findDebugLoc() { diff --git a/lib/CodeGen/LiveDebugVariables.h b/lib/CodeGen/LiveDebugVariables.h index 58a3f0f..bb67435 100644 --- a/lib/CodeGen/LiveDebugVariables.h +++ b/lib/CodeGen/LiveDebugVariables.h @@ -61,9 +61,9 @@ public: private: - virtual bool runOnMachineFunction(MachineFunction &); - virtual void releaseMemory(); - virtual void getAnalysisUsage(AnalysisUsage &) const; + bool runOnMachineFunction(MachineFunction &) override; + void releaseMemory() override; + void getAnalysisUsage(AnalysisUsage &) const override; }; diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index 2b8feb8..3a7ac11 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -222,13 +222,13 @@ void LiveRange::extendSegmentEndTo(iterator I, SlotIndex NewEnd) { VNInfo *ValNo = I->valno; // Search for the first segment that we can't merge with. - iterator MergeTo = llvm::next(I); + iterator MergeTo = std::next(I); for (; MergeTo != end() && NewEnd >= MergeTo->end; ++MergeTo) { assert(MergeTo->valno == ValNo && "Cannot merge with differing values!"); } // If NewEnd was in the middle of a segment, make sure to get its endpoint. - I->end = std::max(NewEnd, prior(MergeTo)->end); + I->end = std::max(NewEnd, std::prev(MergeTo)->end); // If the newly formed segment now touches the segment after it and if they // have the same value number, merge the two segments into one segment. @@ -239,7 +239,7 @@ void LiveRange::extendSegmentEndTo(iterator I, SlotIndex NewEnd) { } // Erase any dead segments. - segments.erase(llvm::next(I), MergeTo); + segments.erase(std::next(I), MergeTo); } @@ -274,7 +274,7 @@ LiveRange::extendSegmentStartTo(iterator I, SlotIndex NewStart) { MergeTo->end = I->end; } - segments.erase(llvm::next(MergeTo), llvm::next(I)); + segments.erase(std::next(MergeTo), std::next(I)); return MergeTo; } @@ -285,7 +285,7 @@ LiveRange::iterator LiveRange::addSegmentFrom(Segment S, iterator From) { // If the inserted segment starts in the middle or right at the end of // another segment, just extend that segment to contain the segment of S. if (it != begin()) { - iterator B = prior(it); + iterator B = std::prev(it); if (S.valno == B->valno) { if (B->start <= Start && B->end >= Start) { extendSegmentEndTo(B, End); @@ -389,7 +389,7 @@ void LiveRange::removeSegment(SlotIndex Start, SlotIndex End, I->end = Start; // Trim the old segment. // Insert the new one. - segments.insert(llvm::next(I), Segment(End, OldEnd, ValNo)); + segments.insert(std::next(I), Segment(End, OldEnd, ValNo)); } /// removeValNo - Remove all the segments defined by the specified value#. @@ -433,7 +433,7 @@ void LiveRange::join(LiveRange &Other, iterator OutIt = begin(); OutIt->valno = NewVNInfo[LHSValNoAssignments[OutIt->valno->id]]; - for (iterator I = llvm::next(OutIt), E = end(); I != E; ++I) { + for (iterator I = std::next(OutIt), E = end(); I != E; ++I) { VNInfo* nextValNo = NewVNInfo[LHSValNoAssignments[I->valno->id]]; assert(nextValNo != 0 && "Huh?"); @@ -641,10 +641,10 @@ void LiveRange::verify() const { assert(I->valno != 0); assert(I->valno->id < valnos.size()); assert(I->valno == valnos[I->valno->id]); - if (llvm::next(I) != E) { - assert(I->end <= llvm::next(I)->start); - if (I->end == llvm::next(I)->start) - assert(I->valno != llvm::next(I)->valno); + if (std::next(I) != E) { + assert(I->end <= std::next(I)->start); + if (I->end == std::next(I)->start) + assert(I->valno != std::next(I)->valno); } } } @@ -905,8 +905,8 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[], // Rewrite instructions. for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LI.reg), RE = MRI.reg_end(); RI != RE;) { - MachineOperand &MO = RI.getOperand(); - MachineInstr *MI = MO.getParent(); + MachineOperand &MO = *RI; + MachineInstr *MI = RI->getParent(); ++RI; // DBG_VALUE instructions don't have slot indexes, so get the index of the // instruction before them. diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index e1c3217..fdc673f 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -325,8 +326,10 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, SmallPtrSet<MachineBasicBlock*, 16> LiveOut; // Visit all instructions reading li->reg. - for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(li->reg); - MachineInstr *UseMI = I.skipInstruction();) { + for (MachineRegisterInfo::reg_instr_iterator + I = MRI->reg_instr_begin(li->reg), E = MRI->reg_instr_end(); + I != E; ) { + MachineInstr *UseMI = &*(I++); if (UseMI->isDebugValue() || !UseMI->readsVirtualRegister(li->reg)) continue; SlotIndex Idx = getInstructionIndex(UseMI).getRegSlot(); @@ -458,7 +461,7 @@ void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill, MachineBasicBlock *KillMBB = Indexes->getMBBFromIndex(Kill); SlotIndex MBBStart, MBBEnd; - tie(MBBStart, MBBEnd) = Indexes->getMBBRange(KillMBB); + std::tie(MBBStart, MBBEnd) = Indexes->getMBBRange(KillMBB); // If VNI isn't live out from KillMBB, the value is trivially pruned. if (LRQ.endPoint() < MBBEnd) { @@ -485,7 +488,7 @@ void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill, MachineBasicBlock *MBB = *I; // Check if VNI is live in to MBB. - tie(MBBStart, MBBEnd) = Indexes->getMBBRange(MBB); + std::tie(MBBStart, MBBEnd) = Indexes->getMBBRange(MBB); LiveQueryResult LRQ = LI->Query(MBBStart); if (LRQ.valueIn() != VNI) { // This block isn't part of the VNI segment. Prune the search. @@ -620,9 +623,12 @@ LiveIntervals::hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const { } float -LiveIntervals::getSpillWeight(bool isDef, bool isUse, BlockFrequency freq) { - const float Scale = 1.0f / BlockFrequency::getEntryFrequency(); - return (isDef + isUse) * (freq.getFrequency() * Scale); +LiveIntervals::getSpillWeight(bool isDef, bool isUse, + const MachineBlockFrequencyInfo *MBFI, + const MachineInstr *MI) { + BlockFrequency Freq = MBFI->getBlockFreq(MI->getParent()); + const float Scale = 1.0f / MBFI->getEntryFreq(); + return (isDef + isUse) * (Freq.getFrequency() * Scale); } LiveRange::Segment @@ -870,8 +876,8 @@ private: // values. The new range should be placed immediately before NewI, move any // intermediate ranges up. assert(NewI != I && "Inconsistent iterators"); - std::copy(llvm::next(I), NewI, I); - *llvm::prior(NewI) + std::copy(std::next(I), NewI, I); + *std::prev(NewI) = LiveRange::Segment(DefVNI->def, NewIdx.getDeadSlot(), DefVNI); } @@ -916,7 +922,7 @@ private: if (I == E || !SlotIndex::isSameInstr(I->start, OldIdx)) { // No def, search for the new kill. // This can never be an early clobber kill since there is no def. - llvm::prior(I)->end = findLastUseBefore(Reg).getRegSlot(); + std::prev(I)->end = findLastUseBefore(Reg).getRegSlot(); return; } } @@ -952,7 +958,7 @@ private: // DefVNI is a dead def. It may have been moved across other values in LR, // so move I up to NewI. Slide [NewI;I) down one position. - std::copy_backward(NewI, I, llvm::next(I)); + std::copy_backward(NewI, I, std::next(I)); *NewI = LiveRange::Segment(DefVNI->def, NewIdx.getDeadSlot(), DefVNI); } @@ -964,11 +970,11 @@ private: "No RegMask at OldIdx."); *RI = NewIdx.getRegSlot(); assert((RI == LIS.RegMaskSlots.begin() || - SlotIndex::isEarlierInstr(*llvm::prior(RI), *RI)) && - "Cannot move regmask instruction above another call"); - assert((llvm::next(RI) == LIS.RegMaskSlots.end() || - SlotIndex::isEarlierInstr(*RI, *llvm::next(RI))) && - "Cannot move regmask instruction below another call"); + SlotIndex::isEarlierInstr(*std::prev(RI), *RI)) && + "Cannot move regmask instruction above another call"); + assert((std::next(RI) == LIS.RegMaskSlots.end() || + SlotIndex::isEarlierInstr(*RI, *std::next(RI))) && + "Cannot move regmask instruction below another call"); } // Return the last use of reg between NewIdx and OldIdx. @@ -976,10 +982,10 @@ private: if (TargetRegisterInfo::isVirtualRegister(Reg)) { SlotIndex LastUse = NewIdx; - for (MachineRegisterInfo::use_nodbg_iterator - UI = MRI.use_nodbg_begin(Reg), - UE = MRI.use_nodbg_end(); - UI != UE; UI.skipInstruction()) { + for (MachineRegisterInfo::use_instr_nodbg_iterator + UI = MRI.use_instr_nodbg_begin(Reg), + UE = MRI.use_instr_nodbg_end(); + UI != UE; ++UI) { const MachineInstr* MI = &*UI; SlotIndex InstSlot = LIS.getSlotIndexes()->getInstructionIndex(MI); if (InstSlot > LastUse && InstSlot < OldIdx) @@ -1121,7 +1127,7 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, if (LII->end.isDead()) { SlotIndex prevStart; if (LII != LI.begin()) - prevStart = llvm::prior(LII)->start; + prevStart = std::prev(LII)->start; // FIXME: This could be more efficient if there was a // removeSegment method that returned an iterator. diff --git a/lib/CodeGen/LivePhysRegs.cpp b/lib/CodeGen/LivePhysRegs.cpp new file mode 100644 index 0000000..7efd941 --- /dev/null +++ b/lib/CodeGen/LivePhysRegs.cpp @@ -0,0 +1,114 @@ +//===--- LivePhysRegs.cpp - Live Physical Register Set --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LivePhysRegs utility for tracking liveness of +// physical registers across machine instructions in forward or backward order. +// A more detailed description can be found in the corresponding header file. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/Support/Debug.h" +using namespace llvm; + + +/// \brief Remove all registers from the set that get clobbered by the register +/// mask. +void LivePhysRegs::removeRegsInMask(const MachineOperand &MO) { + SparseSet<unsigned>::iterator LRI = LiveRegs.begin(); + while (LRI != LiveRegs.end()) { + if (MO.clobbersPhysReg(*LRI)) + LRI = LiveRegs.erase(LRI); + else + ++LRI; + } +} + +/// Simulates liveness when stepping backwards over an instruction(bundle): +/// Remove Defs, add uses. This is the recommended way of calculating liveness. +void LivePhysRegs::stepBackward(const MachineInstr &MI) { + // Remove defined registers and regmask kills from the set. + for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) { + if (O->isReg()) { + if (!O->isDef()) + continue; + unsigned Reg = O->getReg(); + if (Reg == 0) + continue; + removeReg(Reg); + } else if (O->isRegMask()) + removeRegsInMask(*O); + } + + // Add uses to the set. + for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) { + if (!O->isReg() || !O->readsReg() || O->isUndef()) + continue; + unsigned Reg = O->getReg(); + if (Reg == 0) + continue; + addReg(Reg); + } +} + +/// Simulates liveness when stepping forward over an instruction(bundle): Remove +/// killed-uses, add defs. This is the not recommended way, because it depends +/// on accurate kill flags. If possible use stepBackwards() instead of this +/// function. +void LivePhysRegs::stepForward(const MachineInstr &MI) { + SmallVector<unsigned, 4> Defs; + // Remove killed registers from the set. + for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) { + if (O->isReg()) { + unsigned Reg = O->getReg(); + if (Reg == 0) + continue; + if (O->isDef()) { + if (!O->isDead()) + Defs.push_back(Reg); + } else { + if (!O->isKill()) + continue; + assert(O->isUse()); + removeReg(Reg); + } + } else if (O->isRegMask()) + removeRegsInMask(*O); + } + + // Add defs to the set. + for (unsigned i = 0, e = Defs.size(); i != e; ++i) + addReg(Defs[i]); +} + +/// Prin the currently live registers to OS. +void LivePhysRegs::print(raw_ostream &OS) const { + OS << "Live Registers:"; + if (!TRI) { + OS << " (uninitialized)\n"; + return; + } + + if (empty()) { + OS << " (empty)\n"; + return; + } + + for (const_iterator I = begin(), E = end(); I != E; ++I) + OS << " " << PrintReg(*I, TRI); + OS << "\n"; +} + +/// Dumps the currently live registers to the debug output. +void LivePhysRegs::dump() const { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + dbgs() << " " << *this; +#endif +} diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp index ae086bc..ecd75b4 100644 --- a/lib/CodeGen/LiveRangeCalc.cpp +++ b/lib/CodeGen/LiveRangeCalc.cpp @@ -41,9 +41,8 @@ void LiveRangeCalc::createDeadDefs(LiveRange &LR, unsigned Reg) { // Visit all def operands. If the same instruction has multiple defs of Reg, // LR.createDeadDef() will deduplicate. - for (MachineRegisterInfo::def_iterator - I = MRI->def_begin(Reg), E = MRI->def_end(); I != E; ++I) { - const MachineInstr *MI = &*I; + for (MachineOperand &MO : MRI->def_operands(Reg)) { + const MachineInstr *MI = MO.getParent(); // Find the corresponding slot index. SlotIndex Idx; if (MI->isPHI()) @@ -52,7 +51,7 @@ void LiveRangeCalc::createDeadDefs(LiveRange &LR, unsigned Reg) { else // Instructions are either normal 'r', or early clobber 'e'. Idx = Indexes->getInstructionIndex(MI) - .getRegSlot(I.getOperand().isEarlyClobber()); + .getRegSlot(MO.isEarlyClobber()); // Create the def in LR. This may find an existing def. LR.createDeadDef(Idx, *Alloc); @@ -64,9 +63,7 @@ void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg) { assert(MRI && Indexes && "call reset() first"); // Visit all operands that read Reg. This may include partial defs. - for (MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(Reg), - E = MRI->reg_nodbg_end(); I != E; ++I) { - MachineOperand &MO = I.getOperand(); + for (MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) { // Clear all kill flags. They will be reinserted after register allocation // by LiveIntervalAnalysis::addKillFlags(). if (MO.isUse()) @@ -75,7 +72,8 @@ void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg) { continue; // MI is reading Reg. We may have visited MI before if it happens to be // reading Reg multiple times. That is OK, extend() is idempotent. - const MachineInstr *MI = &*I; + const MachineInstr *MI = MO.getParent(); + unsigned OpNo = (&MO - &MI->getOperand(0)); // Find the SlotIndex being read. SlotIndex Idx; @@ -83,7 +81,7 @@ void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg) { assert(!MO.isDef() && "Cannot handle PHI def of partial register."); // PHI operands are paired: (Reg, PredMBB). // Extend the live range to be live-out from PredMBB. - Idx = Indexes->getMBBEndIdx(MI->getOperand(I.getOperandNo()+1).getMBB()); + Idx = Indexes->getMBBEndIdx(MI->getOperand(OpNo+1).getMBB()); } else { // This is a normal instruction. Idx = Indexes->getInstructionIndex(MI).getRegSlot(); @@ -92,7 +90,7 @@ void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg) { if (MO.isDef()) { if (MO.isEarlyClobber()) Idx = Idx.getRegSlot(true); - } else if (MI->isRegTiedToDefOperand(I.getOperandNo(), &DefIdx)) { + } else if (MI->isRegTiedToDefOperand(OpNo, &DefIdx)) { // FIXME: This would be a lot easier if tied early-clobber uses also // had an early-clobber flag. if (MI->getOperand(DefIdx).isEarlyClobber()) @@ -114,7 +112,7 @@ void LiveRangeCalc::updateLiveIns() { MachineBasicBlock *MBB = I->DomNode->getBlock(); assert(I->Value && "No live-in value found"); SlotIndex Start, End; - tie(Start, End) = Indexes->getMBBRange(MBB); + std::tie(Start, End) = Indexes->getMBBRange(MBB); if (I->Kill.isValid()) // Value is killed inside this block. @@ -212,7 +210,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB, } SlotIndex Start, End; - tie(Start, End) = Indexes->getMBBRange(Pred); + std::tie(Start, End) = Indexes->getMBBRange(Pred); // First time we see Pred. Try to determine the live-out value, but set // it as null if Pred is live-through with an unknown value. @@ -247,7 +245,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB, for (SmallVectorImpl<unsigned>::const_iterator I = WorkList.begin(), E = WorkList.end(); I != E; ++I) { SlotIndex Start, End; - tie(Start, End) = Indexes->getMBBRange(*I); + std::tie(Start, End) = Indexes->getMBBRange(*I); // Trim the live range in KillMBB. if (*I == KillMBBNum && Kill.isValid()) End = Kill; @@ -342,7 +340,7 @@ void LiveRangeCalc::updateSSA() { ++Changes; assert(Alloc && "Need VNInfo allocator to create PHI-defs"); SlotIndex Start, End; - tie(Start, End) = Indexes->getMBBRange(MBB); + std::tie(Start, End) = Indexes->getMBBRange(MBB); LiveRange &LR = I->LR; VNInfo *VNI = LR.getNextValue(Start, *Alloc); I->Value = VNI; diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp index cb70c43..891eaab 100644 --- a/lib/CodeGen/LiveRangeEdit.cpp +++ b/lib/CodeGen/LiveRangeEdit.cpp @@ -167,9 +167,7 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, MachineInstr *DefMI = 0, *UseMI = 0; // Check that there is a single def and a single use. - for (MachineRegisterInfo::reg_nodbg_iterator I = MRI.reg_nodbg_begin(LI->reg), - E = MRI.reg_nodbg_end(); I != E; ++I) { - MachineOperand &MO = I.getOperand(); + for (MachineOperand &MO : MRI.reg_nodbg_operands(LI->reg)) { MachineInstr *MI = MO.getParent(); if (MO.isDef()) { if (DefMI && DefMI != MI) diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp index 1d801ac..7f797be 100644 --- a/lib/CodeGen/LiveRegMatrix.cpp +++ b/lib/CodeGen/LiveRegMatrix.cpp @@ -65,7 +65,9 @@ bool LiveRegMatrix::runOnMachineFunction(MachineFunction &MF) { void LiveRegMatrix::releaseMemory() { for (unsigned i = 0, e = Matrix.size(); i != e; ++i) { Matrix[i].clear(); - Queries[i].clear(); + // No need to clear Queries here, since LiveIntervalUnion::Query doesn't + // have anything important to clear and LiveRegMatrix's runOnFunction() + // does a std::unique_ptr::reset anyways. } } diff --git a/lib/CodeGen/LiveRegUnits.cpp b/lib/CodeGen/LiveRegUnits.cpp deleted file mode 100644 index 6221ca2..0000000 --- a/lib/CodeGen/LiveRegUnits.cpp +++ /dev/null @@ -1,111 +0,0 @@ -//===-- LiveInterval.cpp - Live Interval Representation -------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the LiveRegUnits utility for tracking liveness of -// physical register units across machine instructions in forward or backward -// order. -// -//===----------------------------------------------------------------------===// - -#include "llvm/CodeGen/LiveRegUnits.h" -#include "llvm/CodeGen/MachineInstrBundle.h" -using namespace llvm; - -/// Return true if the given MachineOperand clobbers the given register unit. -/// A register unit is only clobbered if all its super-registers are clobbered. -static bool operClobbersUnit(const MachineOperand *MO, unsigned Unit, - const MCRegisterInfo *MCRI) { - for (MCRegUnitRootIterator RI(Unit, MCRI); RI.isValid(); ++RI) { - for (MCSuperRegIterator SI(*RI, MCRI, true); SI.isValid(); ++SI) { - if (!MO->clobbersPhysReg(*SI)) - return false; - } - } - return true; -} - -/// We assume the high bits of a physical super register are not preserved -/// unless the instruction has an implicit-use operand reading the -/// super-register or a register unit for the upper bits is available. -void LiveRegUnits::removeRegsInMask(const MachineOperand &Op, - const MCRegisterInfo &MCRI) { - SparseSet<unsigned>::iterator LUI = LiveUnits.begin(); - while (LUI != LiveUnits.end()) { - if (operClobbersUnit(&Op, *LUI, &MCRI)) - LUI = LiveUnits.erase(LUI); - else - ++LUI; - } -} - -void LiveRegUnits::stepBackward(const MachineInstr &MI, - const MCRegisterInfo &MCRI) { - // Remove defined registers and regmask kills from the set. - for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) { - if (O->isReg()) { - if (!O->isDef()) - continue; - unsigned Reg = O->getReg(); - if (Reg == 0) - continue; - removeReg(Reg, MCRI); - } else if (O->isRegMask()) { - removeRegsInMask(*O, MCRI); - } - } - // Add uses to the set. - for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) { - if (!O->isReg() || !O->readsReg() || O->isUndef()) - continue; - unsigned Reg = O->getReg(); - if (Reg == 0) - continue; - addReg(Reg, MCRI); - } -} - -/// Uses with kill flag get removed from the set, defs added. If possible -/// use StepBackward() instead of this function because some kill flags may -/// be missing. -void LiveRegUnits::stepForward(const MachineInstr &MI, - const MCRegisterInfo &MCRI) { - SmallVector<unsigned, 4> Defs; - // Remove killed registers from the set. - for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) { - if (O->isReg()) { - unsigned Reg = O->getReg(); - if (Reg == 0) - continue; - if (O->isDef()) { - if (!O->isDead()) - Defs.push_back(Reg); - } else { - if (!O->isKill()) - continue; - assert(O->isUse()); - removeReg(Reg, MCRI); - } - } else if (O->isRegMask()) { - removeRegsInMask(*O, MCRI); - } - } - // Add defs to the set. - for (unsigned i = 0, e = Defs.size(); i != e; ++i) { - addReg(Defs[i], MCRI); - } -} - -/// Adds all registers in the live-in list of block @p BB. -void LiveRegUnits::addLiveIns(const MachineBasicBlock *MBB, - const MCRegisterInfo &MCRI) { - for (MachineBasicBlock::livein_iterator L = MBB->livein_begin(), - LE = MBB->livein_end(); L != LE; ++L) { - addReg(*L, MCRI); - } -} diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp index 26a1176..122d467 100644 --- a/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -17,12 +17,14 @@ #define DEBUG_TYPE "localstackalloc" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/StackProtector.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Instructions.h" @@ -60,18 +62,27 @@ namespace { class LocalStackSlotPass: public MachineFunctionPass { SmallVector<int64_t,16> LocalOffsets; + /// StackObjSet - A set of stack object indexes + typedef SmallSetVector<int, 8> StackObjSet; void AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, int64_t &Offset, bool StackGrowsDown, unsigned &MaxAlign); + void AssignProtectedObjSet(const StackObjSet &UnassignedObjs, + SmallSet<int, 16> &ProtectedObjs, + MachineFrameInfo *MFI, bool StackGrowsDown, + int64_t &Offset, unsigned &MaxAlign); void calculateFrameObjectOffsets(MachineFunction &Fn); bool insertFrameReferenceRegisters(MachineFunction &Fn); public: static char ID; // Pass identification, replacement for typeid - explicit LocalStackSlotPass() : MachineFunctionPass(ID) { } - bool runOnMachineFunction(MachineFunction &MF); + explicit LocalStackSlotPass() : MachineFunctionPass(ID) { + initializeLocalStackSlotPassPass(*PassRegistry::getPassRegistry()); + } + bool runOnMachineFunction(MachineFunction &MF) override; - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); + AU.addRequired<StackProtector>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -81,8 +92,12 @@ namespace { char LocalStackSlotPass::ID = 0; char &llvm::LocalStackSlotAllocationID = LocalStackSlotPass::ID; -INITIALIZE_PASS(LocalStackSlotPass, "localstackalloc", - "Local Stack Slot Allocation", false, false) +INITIALIZE_PASS_BEGIN(LocalStackSlotPass, "localstackalloc", + "Local Stack Slot Allocation", false, false) +INITIALIZE_PASS_DEPENDENCY(StackProtector) +INITIALIZE_PASS_END(LocalStackSlotPass, "localstackalloc", + "Local Stack Slot Allocation", false, false) + bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) { MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -145,6 +160,22 @@ void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo *MFI, ++NumAllocations; } +/// AssignProtectedObjSet - Helper function to assign large stack objects (i.e., +/// those required to be close to the Stack Protector) to stack offsets. +void LocalStackSlotPass::AssignProtectedObjSet(const StackObjSet &UnassignedObjs, + SmallSet<int, 16> &ProtectedObjs, + MachineFrameInfo *MFI, + bool StackGrowsDown, int64_t &Offset, + unsigned &MaxAlign) { + + for (StackObjSet::const_iterator I = UnassignedObjs.begin(), + E = UnassignedObjs.end(); I != E; ++I) { + int i = *I; + AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign); + ProtectedObjs.insert(i); + } +} + /// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the /// abstract stack objects. /// @@ -156,11 +187,16 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) { TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; int64_t Offset = 0; unsigned MaxAlign = 0; + StackProtector *SP = &getAnalysis<StackProtector>(); // Make sure that the stack protector comes before the local variables on the // stack. - SmallSet<int, 16> LargeStackObjs; + SmallSet<int, 16> ProtectedObjs; if (MFI->getStackProtectorIndex() >= 0) { + StackObjSet LargeArrayObjs; + StackObjSet SmallArrayObjs; + StackObjSet AddrOfObjs; + AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), Offset, StackGrowsDown, MaxAlign); @@ -170,12 +206,29 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) { continue; if (MFI->getStackProtectorIndex() == (int)i) continue; - if (!MFI->MayNeedStackProtector(i)) - continue; - AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign); - LargeStackObjs.insert(i); + switch (SP->getSSPLayout(MFI->getObjectAllocation(i))) { + case StackProtector::SSPLK_None: + continue; + case StackProtector::SSPLK_SmallArray: + SmallArrayObjs.insert(i); + continue; + case StackProtector::SSPLK_AddrOf: + AddrOfObjs.insert(i); + continue; + case StackProtector::SSPLK_LargeArray: + LargeArrayObjs.insert(i); + continue; + } + llvm_unreachable("Unexpected SSPLayoutKind."); } + + AssignProtectedObjSet(LargeArrayObjs, ProtectedObjs, MFI, StackGrowsDown, + Offset, MaxAlign); + AssignProtectedObjSet(SmallArrayObjs, ProtectedObjs, MFI, StackGrowsDown, + Offset, MaxAlign); + AssignProtectedObjSet(AddrOfObjs, ProtectedObjs, MFI, StackGrowsDown, + Offset, MaxAlign); } // Then assign frame offsets to stack objects that are not used to spill @@ -185,7 +238,7 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) { continue; if (MFI->getStackProtectorIndex() == (int)i) continue; - if (LargeStackObjs.count(i)) + if (ProtectedObjs.count(i)) continue; AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign); @@ -233,9 +286,11 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { MachineInstr *MI = I; - // Debug value instructions can't be out of range, so they don't need - // any updates. - if (MI->isDebugValue()) + // Debug value, stackmap and patchpoint instructions can't be out of + // range, so they don't need any updates. + if (MI->isDebugValue() || + MI->getOpcode() == TargetOpcode::STACKMAP || + MI->getOpcode() == TargetOpcode::PATCHPOINT) continue; // For now, allocate the base register(s) within the basic block @@ -322,18 +377,11 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { // processed all FrameRefs before this one, just check whether or not // the next FrameRef will be able to reuse this new register. If not, // then don't bother creating it. - bool CanReuse = false; - for (int refn = ref + 1; refn < e; ++refn) { - FrameRef &FRN = FrameReferenceInsns[refn]; - MachineBasicBlock::iterator J = FRN.getMachineInstr(); - MachineInstr *MIN = J; - - CanReuse = lookupCandidateBaseReg(BaseOffset, FrameSizeAdjust, - FRN.getLocalOffset(), MIN, TRI); - break; - } - - if (!CanReuse) { + if (ref + 1 >= e || + !lookupCandidateBaseReg( + BaseOffset, FrameSizeAdjust, + FrameReferenceInsns[ref + 1].getLocalOffset(), + FrameReferenceInsns[ref + 1].getMachineInstr(), TRI)) { BaseOffset = PrevBaseOffset; continue; } @@ -363,7 +411,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { // Modify the instruction to use the new base register rather // than the frame index operand. - TRI->resolveFrameIndex(I, BaseReg, Offset); + TRI->resolveFrameIndex(*I, BaseReg, Offset); DEBUG(dbgs() << "Resolved: " << *MI); ++NumReplacements; diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index ca71e3b..888c20e 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -14,7 +14,6 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" -#include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineDominators.h" @@ -25,10 +24,10 @@ #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/LeakDetector.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/LeakDetector.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" @@ -52,7 +51,8 @@ MCSymbol *MachineBasicBlock::getSymbol() const { if (!CachedMCSymbol) { const MachineFunction *MF = getParent(); MCContext &Ctx = MF->getContext(); - const char *Prefix = Ctx.getAsmInfo()->getPrivateGlobalPrefix(); + const TargetMachine &TM = MF->getTarget(); + const char *Prefix = TM.getDataLayout()->getPrivateGlobalPrefix(); CachedMCSymbol = Ctx.GetOrCreateSymbol(Twine(Prefix) + "BB" + Twine(MF->getFunctionNumber()) + "_" + Twine(getNumber())); @@ -160,7 +160,7 @@ MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() { MachineBasicBlock::iterator MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) { iterator E = end(); - while (I != E && (I->isPHI() || I->isLabel() || I->isDebugValue())) + while (I != E && (I->isPHI() || I->isPosition() || I->isDebugValue())) ++I; // FIXME: This needs to change if we wish to bundle labels / dbg_values // inside the bundle. @@ -277,7 +277,7 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const { const char *Comma = ""; if (const BasicBlock *LBB = getBasicBlock()) { OS << Comma << "derived from LLVM BB "; - WriteAsOperand(OS, LBB, /*PrintType=*/false); + LBB->printAsOperand(OS, /*PrintType=*/false); Comma = ", "; } if (isLandingPad()) { OS << Comma << "EH LANDING PAD"; Comma = ", "; } @@ -330,6 +330,10 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const { } } +void MachineBasicBlock::printAsOperand(raw_ostream &OS, bool /*PrintType*/) { + OS << "BB#" << getNumber(); +} + void MachineBasicBlock::removeLiveIn(unsigned Reg) { std::vector<unsigned>::iterator I = std::find(LiveIns.begin(), LiveIns.end(), Reg); @@ -622,7 +626,7 @@ bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const { bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const { MachineFunction::const_iterator I(this); - return llvm::next(I) == MachineFunction::const_iterator(MBB); + return std::next(I) == MachineFunction::const_iterator(MBB); } bool MachineBasicBlock::canFallThrough() { @@ -677,6 +681,11 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { MachineFunction *MF = getParent(); DebugLoc dl; // FIXME: this is nowhere + // Performance might be harmed on HW that implements branching using exec mask + // where both sides of the branches are always executed. + if (MF->getTarget().requiresStructuredCFG()) + return NULL; + // We may need to update this's terminator, but we can't do that if // AnalyzeBranch fails. If this uses a jump table, we won't touch it. const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); @@ -696,7 +705,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { } MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock(); - MF->insert(llvm::next(MachineFunction::iterator(this)), NMBB); + MF->insert(std::next(MachineFunction::iterator(this)), NMBB); DEBUG(dbgs() << "Splitting critical edge:" " BB#" << getNumber() << " -- BB#" << NMBB->getNumber() @@ -839,7 +848,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { // extend to the end of the new split block. bool isLastMBB = - llvm::next(MachineFunction::iterator(NMBB)) == getParent()->end(); + std::next(MachineFunction::iterator(NMBB)) == getParent()->end(); SlotIndex StartIndex = Indexes->getMBBEndIdx(this); SlotIndex PrevIndex = StartIndex.getPrevSlot(); @@ -1054,7 +1063,7 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA, bool Changed = false; MachineFunction::iterator FallThru = - llvm::next(MachineFunction::iterator(this)); + std::next(MachineFunction::iterator(this)); if (DestA == 0 && DestB == 0) { // Block falls through to successor. @@ -1114,6 +1123,13 @@ uint32_t MachineBasicBlock::getSuccWeight(const_succ_iterator Succ) const { return *getWeightIterator(Succ); } +/// Set successor weight of a given iterator. +void MachineBasicBlock::setSuccWeight(succ_iterator I, uint32_t weight) { + if (Weights.empty()) + return; + *getWeightIterator(I) = weight; +} + /// getWeightIterator - Return wight iterator corresonding to the I successor /// iterator MachineBasicBlock::weight_iterator MachineBasicBlock:: @@ -1210,9 +1226,3 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI, // At this point we have no idea of the liveness of the register. return LQR_Unknown; } - -void llvm::WriteAsOperand(raw_ostream &OS, const MachineBasicBlock *MBB, - bool t) { - OS << "BB#" << MBB->getNumber(); -} - diff --git a/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/lib/CodeGen/MachineBlockFrequencyInfo.cpp index e269d24..13203d5 100644 --- a/lib/CodeGen/MachineBlockFrequencyInfo.cpp +++ b/lib/CodeGen/MachineBlockFrequencyInfo.cpp @@ -1,4 +1,4 @@ -//====----- MachineBlockFrequencyInfo.cpp - Machine Block Frequency Analysis ----====// +//====------ MachineBlockFrequencyInfo.cpp - MBB Frequency Analysis ------====// // // The LLVM Compiler Infrastructure // @@ -16,9 +16,99 @@ #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/InitializePasses.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GraphWriter.h" using namespace llvm; +#ifndef NDEBUG +enum GVDAGType { + GVDT_None, + GVDT_Fraction, + GVDT_Integer +}; + +static cl::opt<GVDAGType> +ViewMachineBlockFreqPropagationDAG("view-machine-block-freq-propagation-dags", + cl::Hidden, + cl::desc("Pop up a window to show a dag displaying how machine block " + "frequencies propagate through the CFG."), + cl::values( + clEnumValN(GVDT_None, "none", + "do not display graphs."), + clEnumValN(GVDT_Fraction, "fraction", "display a graph using the " + "fractional block frequency representation."), + clEnumValN(GVDT_Integer, "integer", "display a graph using the raw " + "integer fractional block frequency representation."), + clEnumValEnd)); + +namespace llvm { + +template <> +struct GraphTraits<MachineBlockFrequencyInfo *> { + typedef const MachineBasicBlock NodeType; + typedef MachineBasicBlock::const_succ_iterator ChildIteratorType; + typedef MachineFunction::const_iterator nodes_iterator; + + static inline + const NodeType *getEntryNode(const MachineBlockFrequencyInfo *G) { + return G->getFunction()->begin(); + } + + static ChildIteratorType child_begin(const NodeType *N) { + return N->succ_begin(); + } + + static ChildIteratorType child_end(const NodeType *N) { + return N->succ_end(); + } + + static nodes_iterator nodes_begin(const MachineBlockFrequencyInfo *G) { + return G->getFunction()->begin(); + } + + static nodes_iterator nodes_end(const MachineBlockFrequencyInfo *G) { + return G->getFunction()->end(); + } +}; + +template<> +struct DOTGraphTraits<MachineBlockFrequencyInfo*> : + public DefaultDOTGraphTraits { + explicit DOTGraphTraits(bool isSimple=false) : + DefaultDOTGraphTraits(isSimple) {} + + static std::string getGraphName(const MachineBlockFrequencyInfo *G) { + return G->getFunction()->getName(); + } + + std::string getNodeLabel(const MachineBasicBlock *Node, + const MachineBlockFrequencyInfo *Graph) { + std::string Result; + raw_string_ostream OS(Result); + + OS << Node->getName().str() << ":"; + switch (ViewMachineBlockFreqPropagationDAG) { + case GVDT_Fraction: + Graph->printBlockFreq(OS, Node); + break; + case GVDT_Integer: + OS << Graph->getBlockFreq(Node).getFrequency(); + break; + case GVDT_None: + llvm_unreachable("If we are not supposed to render a graph we should " + "never reach this point."); + } + + return Result; + } +}; + + +} // end namespace llvm +#endif + INITIALIZE_PASS_BEGIN(MachineBlockFrequencyInfo, "machine-block-freq", "Machine Block Frequency Analysis", true, true) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) @@ -28,15 +118,12 @@ INITIALIZE_PASS_END(MachineBlockFrequencyInfo, "machine-block-freq", char MachineBlockFrequencyInfo::ID = 0; -MachineBlockFrequencyInfo::MachineBlockFrequencyInfo() : MachineFunctionPass(ID) { +MachineBlockFrequencyInfo:: +MachineBlockFrequencyInfo() :MachineFunctionPass(ID) { initializeMachineBlockFrequencyInfoPass(*PassRegistry::getPassRegistry()); - MBFI = new BlockFrequencyImpl<MachineBasicBlock, MachineFunction, - MachineBranchProbabilityInfo>(); } -MachineBlockFrequencyInfo::~MachineBlockFrequencyInfo() { - delete MBFI; -} +MachineBlockFrequencyInfo::~MachineBlockFrequencyInfo() {} void MachineBlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<MachineBranchProbabilityInfo>(); @@ -45,12 +132,55 @@ void MachineBlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const { } bool MachineBlockFrequencyInfo::runOnMachineFunction(MachineFunction &F) { - MachineBranchProbabilityInfo &MBPI = getAnalysis<MachineBranchProbabilityInfo>(); + MachineBranchProbabilityInfo &MBPI = + getAnalysis<MachineBranchProbabilityInfo>(); + if (!MBFI) + MBFI.reset(new ImplType); MBFI->doFunction(&F, &MBPI); +#ifndef NDEBUG + if (ViewMachineBlockFreqPropagationDAG != GVDT_None) { + view(); + } +#endif return false; } +void MachineBlockFrequencyInfo::releaseMemory() { MBFI.reset(); } + +/// Pop up a ghostview window with the current block frequency propagation +/// rendered using dot. +void MachineBlockFrequencyInfo::view() const { +// This code is only for debugging. +#ifndef NDEBUG + ViewGraph(const_cast<MachineBlockFrequencyInfo *>(this), + "MachineBlockFrequencyDAGs"); +#else + errs() << "MachineBlockFrequencyInfo::view is only available in debug builds " + "on systems with Graphviz or gv!\n"; +#endif // NDEBUG +} + BlockFrequency MachineBlockFrequencyInfo:: getBlockFreq(const MachineBasicBlock *MBB) const { - return MBFI->getBlockFreq(MBB); + return MBFI ? MBFI->getBlockFreq(MBB) : 0; +} + +const MachineFunction *MachineBlockFrequencyInfo::getFunction() const { + return MBFI ? MBFI->Fn : nullptr; +} + +raw_ostream & +MachineBlockFrequencyInfo::printBlockFreq(raw_ostream &OS, + const BlockFrequency Freq) const { + return MBFI ? MBFI->printBlockFreq(OS, Freq) : OS; +} + +raw_ostream & +MachineBlockFrequencyInfo::printBlockFreq(raw_ostream &OS, + const MachineBasicBlock *MBB) const { + return MBFI ? MBFI->printBlockFreq(OS, MBB) : OS; +} + +uint64_t MachineBlockFrequencyInfo::getEntryFreq() const { + return MBFI ? MBFI->getEntryFreq() : 0; } diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 4b0f7f3..771e7ce 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -58,6 +58,13 @@ static cl::opt<unsigned> AlignAllBlock("align-all-blocks", "blocks in the function."), cl::init(0), cl::Hidden); +// FIXME: Find a good default for this flag and remove the flag. +static cl::opt<unsigned> +ExitBlockBias("block-placement-exit-block-bias", + cl::desc("Block frequency percentage a loop exit block needs " + "over the original exit to be considered the new exit."), + cl::init(0), cl::Hidden); + namespace { class BlockChain; /// \brief Type for our function-wide basic block -> block chain mapping. @@ -145,7 +152,7 @@ public: #ifndef NDEBUG /// \brief Dump the blocks in this chain. - void dump() LLVM_ATTRIBUTE_USED { + LLVM_DUMP_METHOD void dump() { for (iterator I = begin(), E = end(); I != E; ++I) (*I)->dump(); } @@ -230,9 +237,9 @@ public: initializeMachineBlockPlacementPass(*PassRegistry::getPassRegistry()); } - bool runOnMachineFunction(MachineFunction &F); + bool runOnMachineFunction(MachineFunction &F) override; - void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<MachineBranchProbabilityInfo>(); AU.addRequired<MachineBlockFrequencyInfo>(); AU.addRequired<MachineLoopInfo>(); @@ -360,7 +367,8 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor( // any CFG constraints. if (SuccChain.LoopPredecessors != 0) { if (SuccProb < HotProb) { - DEBUG(dbgs() << " " << getBlockName(*SI) << " -> CFG conflict\n"); + DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb + << " (prob) (CFG conflict)\n"); continue; } @@ -383,8 +391,8 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor( } } if (BadCFGConflict) { - DEBUG(dbgs() << " " << getBlockName(*SI) - << " -> non-cold CFG conflict\n"); + DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb + << " (prob) (non-cold CFG conflict)\n"); continue; } } @@ -401,23 +409,6 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor( return BestSucc; } -namespace { -/// \brief Predicate struct to detect blocks already placed. -class IsBlockPlaced { - const BlockChain &PlacedChain; - const BlockToChainMapType &BlockToChain; - -public: - IsBlockPlaced(const BlockChain &PlacedChain, - const BlockToChainMapType &BlockToChain) - : PlacedChain(PlacedChain), BlockToChain(BlockToChain) {} - - bool operator()(MachineBasicBlock *BB) const { - return BlockToChain.lookup(BB) == &PlacedChain; - } -}; -} - /// \brief Select the best block from a worklist. /// /// This looks through the provided worklist as a list of candidate basic @@ -436,7 +427,9 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( // FIXME: If this shows up on profiles, it could be folded (at the cost of // some code complexity) into the loop below. WorkList.erase(std::remove_if(WorkList.begin(), WorkList.end(), - IsBlockPlaced(Chain, BlockToChain)), + [&](MachineBasicBlock *BB) { + return BlockToChain.lookup(BB) == &Chain; + }), WorkList.end()); MachineBasicBlock *BestBlock = 0; @@ -453,8 +446,8 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( assert(SuccChain.LoopPredecessors == 0 && "Found CFG-violating block"); BlockFrequency CandidateFreq = MBFI->getBlockFreq(*WBI); - DEBUG(dbgs() << " " << getBlockName(*WBI) << " -> " << CandidateFreq - << " (freq)\n"); + DEBUG(dbgs() << " " << getBlockName(*WBI) << " -> "; + MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n"); if (BestBlock && BestFreq >= CandidateFreq) continue; BestBlock = *WBI; @@ -501,11 +494,11 @@ void MachineBlockPlacement::buildChain( MachineBasicBlock *LoopHeaderBB = BB; markChainSuccessors(Chain, LoopHeaderBB, BlockWorkList, BlockFilter); - BB = *llvm::prior(Chain.end()); + BB = *std::prev(Chain.end()); for (;;) { assert(BB); assert(BlockToChain[BB] == &Chain); - assert(*llvm::prior(Chain.end()) == BB); + assert(*std::prev(Chain.end()) == BB); // Look for the best viable successor if there is one to place immediately // after this block. @@ -536,7 +529,7 @@ void MachineBlockPlacement::buildChain( << " to " << getBlockNum(BestSucc) << "\n"); markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter); Chain.merge(BestSucc, &SuccChain); - BB = *llvm::prior(Chain.end()); + BB = *std::prev(Chain.end()); } DEBUG(dbgs() << "Finished forming chain for header block " @@ -575,8 +568,8 @@ MachineBlockPlacement::findBestLoopTop(MachineLoop &L, if (!LoopBlockSet.count(Pred)) continue; DEBUG(dbgs() << " header pred: " << getBlockName(Pred) << ", " - << Pred->succ_size() << " successors, " - << MBFI->getBlockFreq(Pred) << " freq\n"); + << Pred->succ_size() << " successors, "; + MBFI->printBlockFreq(dbgs(), Pred) << " freq\n"); if (Pred->succ_size() > 1) continue; @@ -641,7 +634,7 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, BlockChain &Chain = *BlockToChain[*I]; // Ensure that this block is at the end of a chain; otherwise it could be // mid-way through an inner loop or a successor of an analyzable branch. - if (*I != *llvm::prior(Chain.end())) + if (*I != *std::prev(Chain.end())) continue; // Now walk the successors. We need to establish whether this has a viable @@ -690,14 +683,17 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(*I) * SuccProb; DEBUG(dbgs() << " exiting: " << getBlockName(*I) << " -> " << getBlockName(*SI) << " [L:" << SuccLoopDepth - << "] (" << ExitEdgeFreq << ")\n"); - // Note that we slightly bias this toward an existing layout successor to - // retain incoming order in the absence of better information. - // FIXME: Should we bias this more strongly? It's pretty weak. + << "] ("; + MBFI->printBlockFreq(dbgs(), ExitEdgeFreq) << ")\n"); + // Note that we bias this toward an existing layout successor to retain + // incoming order in the absence of better information. The exit must have + // a frequency higher than the current exit before we consider breaking + // the layout. + BranchProbability Bias(100 - ExitBlockBias, 100); if (!ExitingBB || BestExitLoopDepth < SuccLoopDepth || ExitEdgeFreq > BestExitEdgeFreq || ((*I)->isLayoutSuccessor(*SI) && - !(ExitEdgeFreq < BestExitEdgeFreq))) { + !(ExitEdgeFreq < BestExitEdgeFreq * Bias))) { BestExitEdgeFreq = ExitEdgeFreq; ExitingBB = *I; } @@ -745,7 +741,7 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, PI != PE; ++PI) { BlockChain *PredChain = BlockToChain[*PI]; if (!LoopBlockSet.count(*PI) && - (!PredChain || *PI == *llvm::prior(PredChain->end()))) { + (!PredChain || *PI == *std::prev(PredChain->end()))) { ViableTopFallthrough = true; break; } @@ -755,7 +751,7 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, // bottom is a viable exiting block. If so, bail out as rotating will // introduce an unnecessary branch. if (ViableTopFallthrough) { - MachineBasicBlock *Bottom = *llvm::prior(LoopChain.end()); + MachineBasicBlock *Bottom = *std::prev(LoopChain.end()); for (MachineBasicBlock::succ_iterator SI = Bottom->succ_begin(), SE = Bottom->succ_end(); SI != SE; ++SI) { @@ -771,7 +767,7 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, if (ExitIt == LoopChain.end()) return; - std::rotate(LoopChain.begin(), llvm::next(ExitIt), LoopChain.end()); + std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end()); } /// \brief Forms basic block chains from the natural loop structures. @@ -891,7 +887,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough()) break; - MachineFunction::iterator NextFI(llvm::next(FI)); + MachineFunction::iterator NextFI(std::next(FI)); MachineBasicBlock *NextBB = NextFI; // Ensure that the layout successor is a viable block, as we know that // fallthrough is a possibility. @@ -939,7 +935,9 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { BlockChain &FunctionChain = *BlockToChain[&F.front()]; buildChain(&F.front(), FunctionChain, BlockWorkList); +#ifndef NDEBUG typedef SmallPtrSet<MachineBasicBlock *, 16> FunctionBlockSetType; +#endif DEBUG({ // Crash at the end so we get all of the debugging output first. bool BadFunc = false; @@ -983,7 +981,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // Update the terminator of the previous block. if (BI == FunctionChain.begin()) continue; - MachineBasicBlock *PrevBB = llvm::prior(MachineFunction::iterator(*BI)); + MachineBasicBlock *PrevBB = std::prev(MachineFunction::iterator(*BI)); // FIXME: It would be awesome of updateTerminator would just return rather // than assert when the branch cannot be analyzed in order to remove this @@ -1055,7 +1053,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { const BranchProbability ColdProb(1, 5); // 20% BlockFrequency EntryFreq = MBFI->getBlockFreq(F.begin()); BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb; - for (BlockChain::iterator BI = llvm::next(FunctionChain.begin()), + for (BlockChain::iterator BI = std::next(FunctionChain.begin()), BE = FunctionChain.end(); BI != BE; ++BI) { // Don't align non-looping basic blocks. These are unlikely to execute @@ -1081,7 +1079,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // Check for the existence of a non-layout predecessor which would benefit // from aligning this block. - MachineBasicBlock *LayoutPred = *llvm::prior(BI); + MachineBasicBlock *LayoutPred = *std::prev(BI); // Force alignment if all the predecessors are jumps. We already checked // that the block isn't cold above. @@ -1103,7 +1101,10 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) { // Check for single-block functions and skip them. - if (llvm::next(F.begin()) == F.end()) + if (std::next(F.begin()) == F.end()) + return false; + + if (skipOptnoneFunction(*F.getFunction())) return false; MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); @@ -1149,9 +1150,9 @@ public: initializeMachineBlockPlacementStatsPass(*PassRegistry::getPassRegistry()); } - bool runOnMachineFunction(MachineFunction &F); + bool runOnMachineFunction(MachineFunction &F) override; - void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<MachineBranchProbabilityInfo>(); AU.addRequired<MachineBlockFrequencyInfo>(); AU.setPreservesAll(); @@ -1171,7 +1172,7 @@ INITIALIZE_PASS_END(MachineBlockPlacementStats, "block-placement-stats", bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) { // Check for single-block functions and skip them. - if (llvm::next(F.begin()) == F.end()) + if (std::next(F.begin()) == F.end()) return false; MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); diff --git a/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/lib/CodeGen/MachineBranchProbabilityInfo.cpp index ae70912..1d6879b 100644 --- a/lib/CodeGen/MachineBranchProbabilityInfo.cpp +++ b/lib/CodeGen/MachineBranchProbabilityInfo.cpp @@ -77,8 +77,9 @@ getEdgeWeight(const MachineBasicBlock *Src, return getEdgeWeight(Src, std::find(Src->succ_begin(), Src->succ_end(), Dst)); } -bool MachineBranchProbabilityInfo::isEdgeHot(MachineBasicBlock *Src, - MachineBasicBlock *Dst) const { +bool +MachineBranchProbabilityInfo::isEdgeHot(const MachineBasicBlock *Src, + const MachineBasicBlock *Dst) const { // Hot probability is at least 4/5 = 80% // FIXME: Compare against a static "hot" BranchProbability. return getEdgeProbability(Src, Dst) > BranchProbability(4, 5); @@ -103,9 +104,8 @@ MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const { return 0; } -BranchProbability -MachineBranchProbabilityInfo::getEdgeProbability(MachineBasicBlock *Src, - MachineBasicBlock *Dst) const { +BranchProbability MachineBranchProbabilityInfo::getEdgeProbability( + const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const { uint32_t Scale = 1; uint32_t D = getSumForBlock(Src, Scale); uint32_t N = getEdgeWeight(Src, Dst) / Scale; @@ -113,13 +113,13 @@ MachineBranchProbabilityInfo::getEdgeProbability(MachineBasicBlock *Src, return BranchProbability(N, D); } -raw_ostream &MachineBranchProbabilityInfo:: -printEdgeProbability(raw_ostream &OS, MachineBasicBlock *Src, - MachineBasicBlock *Dst) const { +raw_ostream &MachineBranchProbabilityInfo::printEdgeProbability( + raw_ostream &OS, const MachineBasicBlock *Src, + const MachineBasicBlock *Dst) const { const BranchProbability Prob = getEdgeProbability(Src, Dst); OS << "edge MBB#" << Src->getNumber() << " -> MBB#" << Dst->getNumber() - << " probability is " << Prob + << " probability is " << Prob << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n"); return OS; diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index d228286..9c3bcc4 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -49,9 +49,9 @@ namespace { initializeMachineCSEPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); AU.addRequired<AliasAnalysis>(); @@ -60,7 +60,7 @@ namespace { AU.addPreserved<MachineDominatorTree>(); } - virtual void releaseMemory() { + void releaseMemory() override { ScopeMap.clear(); Exps.clear(); } @@ -131,9 +131,24 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI, unsigned SrcReg = DefMI->getOperand(1).getReg(); if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) continue; - if (DefMI->getOperand(0).getSubReg() || DefMI->getOperand(1).getSubReg()) + if (DefMI->getOperand(0).getSubReg()) continue; - if (!MRI->constrainRegClass(SrcReg, MRI->getRegClass(Reg))) + // FIXME: We should trivially coalesce subregister copies to expose CSE + // opportunities on instructions with truncated operands (see + // cse-add-with-overflow.ll). This can be done here as follows: + // if (SrcSubReg) + // RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC, + // SrcSubReg); + // MO.substVirtReg(SrcReg, SrcSubReg, *TRI); + // + // The 2-addr pass has been updated to handle coalesced subregs. However, + // some machine-specific code still can't handle it. + // To handle it properly we also need a way find a constrained subregister + // class given a super-reg class and subreg index. + if (DefMI->getOperand(1).getSubReg()) + continue; + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + if (!MRI->constrainRegClass(SrcReg, RC)) continue; DEBUG(dbgs() << "Coalescing: " << *DefMI); DEBUG(dbgs() << "*** to: " << *MI); @@ -214,7 +229,7 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, // Next, collect all defs into PhysDefs. If any is already in PhysRefs // (which currently contains only uses), set the PhysUseDef flag. PhysUseDef = false; - MachineBasicBlock::const_iterator I = MI; I = llvm::next(I); + MachineBasicBlock::const_iterator I = MI; I = std::next(I); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isDef()) @@ -265,7 +280,7 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, } CrossMBB = true; } - MachineBasicBlock::const_iterator I = CSMI; I = llvm::next(I); + MachineBasicBlock::const_iterator I = CSMI; I = std::next(I); MachineBasicBlock::const_iterator E = MI; MachineBasicBlock::const_iterator EE = CSMBB->end(); unsigned LookAheadLeft = LookAheadLimit; @@ -310,8 +325,8 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, } bool MachineCSE::isCSECandidate(MachineInstr *MI) { - if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() || - MI->isKill() || MI->isInlineAsm() || MI->isDebugValue()) + if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() || + MI->isInlineAsm() || MI->isDebugValue()) return false; // Ignore copies. @@ -349,15 +364,11 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, TargetRegisterInfo::isVirtualRegister(Reg)) { MayIncreasePressure = false; SmallPtrSet<MachineInstr*, 8> CSUses; - for (MachineRegisterInfo::use_nodbg_iterator I =MRI->use_nodbg_begin(CSReg), - E = MRI->use_nodbg_end(); I != E; ++I) { - MachineInstr *Use = &*I; - CSUses.insert(Use); + for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) { + CSUses.insert(&MI); } - for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg), - E = MRI->use_nodbg_end(); I != E; ++I) { - MachineInstr *Use = &*I; - if (!CSUses.count(Use)) { + for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) { + if (!CSUses.count(&MI)) { MayIncreasePressure = true; break; } @@ -388,11 +399,9 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, } if (!HasVRegUse) { bool HasNonCopyUse = false; - for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg), - E = MRI->use_nodbg_end(); I != E; ++I) { - MachineInstr *Use = &*I; + for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) { // Ignore copies. - if (!Use->isCopyLike()) { + if (!MI.isCopyLike()) { HasNonCopyUse = true; break; } @@ -405,11 +414,9 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, // it unless the defined value is already used in the BB of the new use. bool HasPHI = false; SmallPtrSet<MachineBasicBlock*, 4> CSBBs; - for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(CSReg), - E = MRI->use_nodbg_end(); I != E; ++I) { - MachineInstr *Use = &*I; - HasPHI |= Use->isPHI(); - CSBBs.insert(Use->getParent()); + for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) { + HasPHI |= MI.isPHI(); + CSBBs.insert(MI.getParent()); } if (!HasPHI) @@ -513,7 +520,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { bool DoCSE = true; unsigned NumDefs = MI->getDesc().getNumDefs() + MI->getDesc().getNumImplicitDefs(); - + for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isDef()) @@ -652,6 +659,9 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { } bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { + if (skipOptnoneFunction(*MF.getFunction())) + return false; + TII = MF.getTarget().getInstrInfo(); TRI = MF.getTarget().getRegisterInfo(); MRI = &MF.getRegInfo(); diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp index 4f48e2c..7e1970c 100644 --- a/lib/CodeGen/MachineCopyPropagation.cpp +++ b/lib/CodeGen/MachineCopyPropagation.cpp @@ -42,7 +42,7 @@ namespace { initializeMachineCopyPropagationPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; private: typedef SmallVector<unsigned, 4> DestList; @@ -127,13 +127,10 @@ static bool isNopCopy(MachineInstr *CopyMI, unsigned Def, unsigned Src, } // Remove MI from the function because it has been determined it is dead. -// Turn it into a noop KILL instruction if it has super-register liveness -// adjustments. +// Turn it into a noop KILL instruction as opposed to removing it to +// maintain imp-use/imp-def chains. void MachineCopyPropagation::removeCopy(MachineInstr *MI) { - if (MI->getNumOperands() == 2) - MI->eraseFromParent(); - else - MI->setDesc(TII->get(TargetOpcode::KILL)); + MI->setDesc(TII->get(TargetOpcode::KILL)); } bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { @@ -142,6 +139,8 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { DenseMap<unsigned, MachineInstr*> CopyMap; // Def -> copies map SourceMap SrcMap; // Src -> Def map + DEBUG(dbgs() << "MCP: CopyPropagateBlock " << MBB.getName() << "\n"); + bool Changed = false; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) { MachineInstr *MI = &*I; @@ -176,6 +175,8 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { // CALL // %RAX<def> = COPY %RSP + DEBUG(dbgs() << "MCP: copy is a NOP, removing: "; MI->dump()); + // Clear any kills of Def between CopyMI and MI. This extends the // live range. for (MachineBasicBlock::iterator I = CopyMI, E = MI; I != E; ++I) @@ -191,10 +192,14 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { // If Src is defined by a previous copy, it cannot be eliminated. for (MCRegAliasIterator AI(Src, TRI, true); AI.isValid(); ++AI) { CI = CopyMap.find(*AI); - if (CI != CopyMap.end()) + if (CI != CopyMap.end()) { + DEBUG(dbgs() << "MCP: Copy is no longer dead: "; CI->second->dump()); MaybeDeadCopies.remove(CI->second); + } } + DEBUG(dbgs() << "MCP: Copy is a deletion candidate: "; MI->dump()); + // Copy is now a candidate for deletion. MaybeDeadCopies.insert(MI); @@ -255,8 +260,10 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { // for elimination. for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { DenseMap<unsigned, MachineInstr*>::iterator CI = CopyMap.find(*AI); - if (CI != CopyMap.end()) + if (CI != CopyMap.end()) { + DEBUG(dbgs() << "MCP: Copy is used - not dead: "; CI->second->dump()); MaybeDeadCopies.remove(CI->second); + } } } @@ -273,6 +280,8 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { unsigned Reg = (*DI)->getOperand(0).getReg(); if (MRI->isReserved(Reg) || !MaskMO.clobbersPhysReg(Reg)) continue; + DEBUG(dbgs() << "MCP: Removing copy due to regmask clobbering: "; + (*DI)->dump()); removeCopy(*DI); Changed = true; ++NumDeletes; @@ -320,6 +329,9 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { } bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) { + if (skipOptnoneFunction(*MF.getFunction())) + return false; + bool Changed = false; TRI = MF.getTarget().getRegisterInfo(); diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 0703df0..061efdb 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -17,7 +17,6 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -26,8 +25,8 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/DebugInfo.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -140,7 +139,7 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) { // Figure out the block number this should have. unsigned BlockNo = 0; if (MBBI != begin()) - BlockNo = prior(MBBI)->getNumber()+1; + BlockNo = std::prev(MBBI)->getNumber() + 1; for (; MBBI != E; ++MBBI, ++BlockNo) { if (MBBI->getNumber() != (int)BlockNo) { @@ -347,7 +346,7 @@ void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const { OS << PrintReg(I->first, TRI); if (I->second) OS << " in " << PrintReg(I->second, TRI); - if (llvm::next(I) != E) + if (std::next(I) != E) OS << ", "; } OS << '\n'; @@ -425,7 +424,16 @@ unsigned MachineFunction::addLiveIn(unsigned PReg, MachineRegisterInfo &MRI = getRegInfo(); unsigned VReg = MRI.getLiveInVirtReg(PReg); if (VReg) { - assert(MRI.getRegClass(VReg) == RC && "Register class mismatch!"); + const TargetRegisterClass *VRegRC = MRI.getRegClass(VReg); + (void)VRegRC; + // A physical register can be added several times. + // Between two calls, the register class of the related virtual register + // may have been constrained to match some operation constraints. + // In that case, check that the current register class includes the + // physical register and is a sub class of the specified RC. + assert((VRegRC == RC || (VRegRC->contains(PReg) && + RC->hasSubClassEq(VRegRC))) && + "Register class mismatch!"); return VReg; } VReg = MRI.createVirtualRegister(RC); @@ -438,12 +446,12 @@ unsigned MachineFunction::addLiveIn(unsigned PReg, /// normal 'L' label is returned. MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate) const { + const DataLayout *DL = getTarget().getDataLayout(); assert(JumpTableInfo && "No jump tables"); assert(JTI < JumpTableInfo->getJumpTables().size() && "Invalid JTI!"); - const MCAsmInfo &MAI = *getTarget().getMCAsmInfo(); - const char *Prefix = isLinkerPrivate ? MAI.getLinkerPrivateGlobalPrefix() : - MAI.getPrivateGlobalPrefix(); + const char *Prefix = isLinkerPrivate ? DL->getLinkerPrivateGlobalPrefix() : + DL->getPrivateGlobalPrefix(); SmallString<60> Name; raw_svector_ostream(Name) << Prefix << "JTI" << getFunctionNumber() << '_' << JTI; @@ -453,8 +461,8 @@ MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, /// getPICBaseSymbol - Return a function-local symbol to represent the PIC /// base. MCSymbol *MachineFunction::getPICBaseSymbol() const { - const MCAsmInfo &MAI = *Target.getMCAsmInfo(); - return Ctx.GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix())+ + const DataLayout *DL = getTarget().getDataLayout(); + return Ctx.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+ Twine(getFunctionNumber())+"$pb"); } @@ -490,14 +498,13 @@ static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align, /// a nonnegative identifier to represent it. /// int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment, - bool isSS, bool MayNeedSP, const AllocaInst *Alloca) { + bool isSS, const AllocaInst *Alloca) { assert(Size != 0 && "Cannot allocate zero size stack objects!"); Alignment = clampStackAlignment(!getFrameLowering()->isStackRealignable() || !RealignOption, Alignment, getFrameLowering()->getStackAlignment()); - Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, MayNeedSP, - Alloca)); + Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, Alloca)); int Index = (int)Objects.size() - NumFixedObjects - 1; assert(Index >= 0 && "Bad frame index!"); ensureMaxAlignment(Alignment); @@ -514,7 +521,7 @@ int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, clampStackAlignment(!getFrameLowering()->isStackRealignable() || !RealignOption, Alignment, getFrameLowering()->getStackAlignment()); - CreateStackObject(Size, Alignment, true, false); + CreateStackObject(Size, Alignment, true); int Index = (int)Objects.size() - NumFixedObjects - 1; ensureMaxAlignment(Alignment); return Index; @@ -525,13 +532,14 @@ int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, /// variable sized object is created, whether or not the index returned is /// actually used. /// -int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment) { +int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment, + const AllocaInst *Alloca) { HasVarSizedObjects = true; Alignment = clampStackAlignment(!getFrameLowering()->isStackRealignable() || !RealignOption, Alignment, getFrameLowering()->getStackAlignment()); - Objects.push_back(StackObject(0, Alignment, 0, false, false, true, 0)); + Objects.push_back(StackObject(0, Alignment, 0, false, false, Alloca)); ensureMaxAlignment(Alignment); return (int)Objects.size()-NumFixedObjects-1; } @@ -556,7 +564,6 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, Align, getFrameLowering()->getStackAlignment()); Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, /*isSS*/ false, - /*NeedSP*/ false, /*Alloca*/ 0)); return -++NumFixedObjects; } @@ -910,7 +917,7 @@ void MachineConstantPool::print(raw_ostream &OS) const { if (Constants[i].isMachineConstantPoolEntry()) Constants[i].Val.MachineCPVal->print(OS); else - WriteAsOperand(OS, Constants[i].Val.ConstVal, /*PrintType=*/false); + Constants[i].Val.ConstVal->printAsOperand(OS, /*PrintType=*/false); OS << ", align=" << Constants[i].getAlignment(); OS << "\n"; } diff --git a/lib/CodeGen/MachineFunctionPass.cpp b/lib/CodeGen/MachineFunctionPass.cpp index 674cc80..789f204 100644 --- a/lib/CodeGen/MachineFunctionPass.cpp +++ b/lib/CodeGen/MachineFunctionPass.cpp @@ -51,6 +51,7 @@ void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved("domfrontier"); AU.addPreserved("loops"); AU.addPreserved("lda"); + AU.addPreserved("stack-protector"); FunctionPass::getAnalysisUsage(AU); } diff --git a/lib/CodeGen/MachineFunctionPrinterPass.cpp b/lib/CodeGen/MachineFunctionPrinterPass.cpp index fa9c821..dee3977 100644 --- a/lib/CodeGen/MachineFunctionPrinterPass.cpp +++ b/lib/CodeGen/MachineFunctionPrinterPass.cpp @@ -34,14 +34,14 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass { MachineFunctionPrinterPass(raw_ostream &os, const std::string &banner) : MachineFunctionPass(ID), OS(os), Banner(banner) {} - const char *getPassName() const { return "MachineFunction Printer"; } + const char *getPassName() const override { return "MachineFunction Printer"; } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); } - bool runOnMachineFunction(MachineFunction &MF) { + bool runOnMachineFunction(MachineFunction &MF) override { OS << "# " << Banner << ":\n"; MF.print(OS, getAnalysisIfAvailable<SlotIndexes>()); return false; diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 295b450..d102794 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -15,15 +15,14 @@ #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/Hashing.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" -#include "llvm/DebugInfo.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/LLVMContext.h" @@ -199,10 +198,13 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { case MachineOperand::MO_BlockAddress: return getBlockAddress() == Other.getBlockAddress() && getOffset() == Other.getOffset(); - case MO_RegisterMask: + case MachineOperand::MO_RegisterMask: + case MachineOperand::MO_RegisterLiveOut: return getRegMask() == Other.getRegMask(); case MachineOperand::MO_MCSymbol: return getMCSymbol() == Other.getMCSymbol(); + case MachineOperand::MO_CFIIndex: + return getCFIIndex() == Other.getCFIIndex(); case MachineOperand::MO_Metadata: return getMetadata() == Other.getMetadata(); } @@ -241,11 +243,14 @@ hash_code llvm::hash_value(const MachineOperand &MO) { return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getBlockAddress(), MO.getOffset()); case MachineOperand::MO_RegisterMask: + case MachineOperand::MO_RegisterLiveOut: return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getRegMask()); case MachineOperand::MO_Metadata: return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMetadata()); case MachineOperand::MO_MCSymbol: return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMCSymbol()); + case MachineOperand::MO_CFIIndex: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getCFIIndex()); } llvm_unreachable("Invalid machine operand type"); } @@ -312,7 +317,6 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { OS << "tied"; if (TiedTo != 15) OS << unsigned(TiedTo - 1); - NeedComma = true; } OS << '>'; } @@ -350,7 +354,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { break; case MachineOperand::MO_GlobalAddress: OS << "<ga:"; - WriteAsOperand(OS, getGlobal(), /*PrintType=*/false); + getGlobal()->printAsOperand(OS, /*PrintType=*/false); if (getOffset()) OS << "+" << getOffset(); OS << '>'; break; @@ -361,21 +365,27 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { break; case MachineOperand::MO_BlockAddress: OS << '<'; - WriteAsOperand(OS, getBlockAddress(), /*PrintType=*/false); + getBlockAddress()->printAsOperand(OS, /*PrintType=*/false); if (getOffset()) OS << "+" << getOffset(); OS << '>'; break; case MachineOperand::MO_RegisterMask: OS << "<regmask>"; break; + case MachineOperand::MO_RegisterLiveOut: + OS << "<regliveout>"; + break; case MachineOperand::MO_Metadata: OS << '<'; - WriteAsOperand(OS, getMetadata(), /*PrintType=*/false); + getMetadata()->printAsOperand(OS, /*PrintType=*/false); OS << '>'; break; case MachineOperand::MO_MCSymbol: OS << "<MCSym=" << *getMCSymbol() << '>'; break; + case MachineOperand::MO_CFIIndex: + OS << "<call frame instruction>"; + break; } if (unsigned TF = getTargetFlags()) @@ -479,7 +489,11 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) { if (!MMO.getValue()) OS << "<unknown>"; else - WriteAsOperand(OS, MMO.getValue(), /*PrintType=*/false); + MMO.getValue()->printAsOperand(OS, /*PrintType=*/false); + + unsigned AS = MMO.getAddrSpace(); + if (AS != 0) + OS << "(addrspace=" << AS << ')'; // If the alignment of the memory reference itself differs from the alignment // of the base pointer, print the base alignment explicitly, next to the base @@ -500,7 +514,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) { if (const MDNode *TBAAInfo = MMO.getTBAAInfo()) { OS << "(tbaa="; if (TBAAInfo->getNumOperands() > 0) - WriteAsOperand(OS, TBAAInfo->getOperand(0), /*PrintType=*/false); + TBAAInfo->getOperand(0)->printAsOperand(OS, /*PrintType=*/false); else OS << "<unknown>"; OS << ")"; @@ -984,6 +998,54 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx, return NULL; } +const TargetRegisterClass *MachineInstr::getRegClassConstraintEffectForVReg( + unsigned Reg, const TargetRegisterClass *CurRC, const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI, bool ExploreBundle) const { + // Check every operands inside the bundle if we have + // been asked to. + if (ExploreBundle) + for (ConstMIBundleOperands OpndIt(this); OpndIt.isValid() && CurRC; + ++OpndIt) + CurRC = OpndIt->getParent()->getRegClassConstraintEffectForVRegImpl( + OpndIt.getOperandNo(), Reg, CurRC, TII, TRI); + else + // Otherwise, just check the current operands. + for (ConstMIOperands OpndIt(this); OpndIt.isValid() && CurRC; ++OpndIt) + CurRC = getRegClassConstraintEffectForVRegImpl(OpndIt.getOperandNo(), Reg, + CurRC, TII, TRI); + return CurRC; +} + +const TargetRegisterClass *MachineInstr::getRegClassConstraintEffectForVRegImpl( + unsigned OpIdx, unsigned Reg, const TargetRegisterClass *CurRC, + const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const { + assert(CurRC && "Invalid initial register class"); + // Check if Reg is constrained by some of its use/def from MI. + const MachineOperand &MO = getOperand(OpIdx); + if (!MO.isReg() || MO.getReg() != Reg) + return CurRC; + // If yes, accumulate the constraints through the operand. + return getRegClassConstraintEffect(OpIdx, CurRC, TII, TRI); +} + +const TargetRegisterClass *MachineInstr::getRegClassConstraintEffect( + unsigned OpIdx, const TargetRegisterClass *CurRC, + const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const { + const TargetRegisterClass *OpRC = getRegClassConstraint(OpIdx, TII, TRI); + const MachineOperand &MO = getOperand(OpIdx); + assert(MO.isReg() && + "Cannot get register constraints for non-register operand"); + assert(CurRC && "Invalid initial register class"); + if (unsigned SubIdx = MO.getSubReg()) { + if (OpRC) + CurRC = TRI->getMatchingSuperRegClass(CurRC, OpRC, SubIdx); + else + CurRC = TRI->getSubClassWithSubReg(CurRC, SubIdx); + } else if (OpRC) + CurRC = TRI->getCommonSubClass(CurRC, OpRC); + return CurRC; +} + /// Return the number of instructions inside the MI bundle, not counting the /// header instruction. unsigned MachineInstr::getBundleSize() const { @@ -1239,8 +1301,8 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII, return false; } - if (isLabel() || isDebugValue() || - isTerminator() || hasUnmodeledSideEffects()) + if (isPosition() || isDebugValue() || isTerminator() || + hasUnmodeledSideEffects()) return false; // See if this instruction does a load. If so, we have to guarantee that the @@ -1372,7 +1434,7 @@ void MachineInstr::copyImplicitOps(MachineFunction &MF, for (unsigned i = MI->getDesc().getNumOperands(), e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.isImplicit()) + if ((MO.isReg() && MO.isImplicit()) || MO.isRegMask()) addOperand(MF, MO); } } @@ -1587,7 +1649,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, for (mmo_iterator i = memoperands_begin(), e = memoperands_end(); i != e; ++i) { OS << **i; - if (llvm::next(i) != e) + if (std::next(i) != e) OS << " "; } } @@ -1612,7 +1674,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, // Print debug location information. if (isDebugValue() && getOperand(e - 1).isMetadata()) { - if (!HaveSemi) OS << ";"; HaveSemi = true; + if (!HaveSemi) OS << ";"; DIVariable DV(getOperand(e - 1).getMetadata()); OS << " line no:" << DV.getLineNumber(); if (MDNode *InlinedAt = DV.getInlinedAt()) { @@ -1624,7 +1686,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, } } } else if (!debugLoc.isUnknown() && MF) { - if (!HaveSemi) OS << ";"; HaveSemi = true; + if (!HaveSemi) OS << ";"; OS << " dbg:"; printDebugLoc(debugLoc, MF, OS); } diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp index 77bcd1d..962169e 100644 --- a/lib/CodeGen/MachineInstrBundle.cpp +++ b/lib/CodeGen/MachineInstrBundle.cpp @@ -26,7 +26,7 @@ namespace { initializeUnpackMachineBundlesPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; }; } // end anonymous namespace @@ -77,7 +77,7 @@ namespace { initializeFinalizeMachineBundlesPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; }; } // end anonymous namespace @@ -211,7 +211,7 @@ MachineBasicBlock::instr_iterator llvm::finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI) { MachineBasicBlock::instr_iterator E = MBB.instr_end(); - MachineBasicBlock::instr_iterator LastMI = llvm::next(FirstMI); + MachineBasicBlock::instr_iterator LastMI = std::next(FirstMI); while (LastMI != E && LastMI->isInsideBundle()) ++LastMI; finalizeBundle(MBB, FirstMI, LastMI); @@ -235,7 +235,7 @@ bool llvm::finalizeBundles(MachineFunction &MF) { if (!MII->isInsideBundle()) ++MII; else { - MII = finalizeBundle(MBB, llvm::prior(MII)); + MII = finalizeBundle(MBB, std::prev(MII)); Changed = true; } } diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 104eacd..d3a1ee7 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -125,9 +125,9 @@ namespace { initializeMachineLICMPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<MachineLoopInfo>(); AU.addRequired<MachineDominatorTree>(); AU.addRequired<AliasAnalysis>(); @@ -136,7 +136,7 @@ namespace { MachineFunctionPass::getAnalysisUsage(AU); } - virtual void releaseMemory() { + void releaseMemory() override { RegSeen.clear(); RegPressure.clear(); RegLimit.clear(); @@ -319,6 +319,9 @@ static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) { } bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { + if (skipOptnoneFunction(*MF.getFunction())) + return false; + Changed = FirstInLoop = false; TM = &MF.getTarget(); TII = TM->getInstrInfo(); @@ -978,25 +981,23 @@ bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const { unsigned Reg = MO->getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; - for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), - UE = MRI->use_end(); UI != UE; ++UI) { - MachineInstr *UseMI = &*UI; + for (MachineInstr &UseMI : MRI->use_instructions(Reg)) { // A PHI may cause a copy to be inserted. - if (UseMI->isPHI()) { + if (UseMI.isPHI()) { // A PHI inside the loop causes a copy because the live range of Reg is // extended across the PHI. - if (CurLoop->contains(UseMI)) + if (CurLoop->contains(&UseMI)) return true; // A PHI in an exit block can cause a copy to be inserted if the PHI // has multiple predecessors in the loop with different values. // For now, approximate by rejecting all exit blocks. - if (isExitBlock(UseMI->getParent())) + if (isExitBlock(UseMI.getParent())) return true; continue; } // Look past copies as well. - if (UseMI->isCopy() && CurLoop->contains(UseMI)) - Work.push_back(UseMI); + if (UseMI.isCopy() && CurLoop->contains(&UseMI)) + Work.push_back(&UseMI); } } } while (!Work.empty()); @@ -1011,22 +1012,20 @@ bool MachineLICM::HasHighOperandLatency(MachineInstr &MI, if (!InstrItins || InstrItins->isEmpty() || MRI->use_nodbg_empty(Reg)) return false; - for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg), - E = MRI->use_nodbg_end(); I != E; ++I) { - MachineInstr *UseMI = &*I; - if (UseMI->isCopyLike()) + for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) { + if (UseMI.isCopyLike()) continue; - if (!CurLoop->contains(UseMI->getParent())) + if (!CurLoop->contains(UseMI.getParent())) continue; - for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = UseMI->getOperand(i); + for (unsigned i = 0, e = UseMI.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = UseMI.getOperand(i); if (!MO.isReg() || !MO.isUse()) continue; unsigned MOReg = MO.getReg(); if (MOReg != Reg) continue; - if (TII->hasHighOperandLatency(InstrItins, MRI, &MI, DefIdx, UseMI, i)) + if (TII->hasHighOperandLatency(InstrItins, MRI, &MI, DefIdx, &UseMI, i)) return true; } diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp index 4e2cfdc..89054d4 100644 --- a/lib/CodeGen/MachineLoopInfo.cpp +++ b/lib/CodeGen/MachineLoopInfo.cpp @@ -50,11 +50,11 @@ MachineBasicBlock *MachineLoop::getTopBlock() { MachineBasicBlock *TopMBB = getHeader(); MachineFunction::iterator Begin = TopMBB->getParent()->begin(); if (TopMBB != Begin) { - MachineBasicBlock *PriorMBB = prior(MachineFunction::iterator(TopMBB)); + MachineBasicBlock *PriorMBB = std::prev(MachineFunction::iterator(TopMBB)); while (contains(PriorMBB)) { TopMBB = PriorMBB; if (TopMBB == Begin) break; - PriorMBB = prior(MachineFunction::iterator(TopMBB)); + PriorMBB = std::prev(MachineFunction::iterator(TopMBB)); } } return TopMBB; @@ -63,12 +63,12 @@ MachineBasicBlock *MachineLoop::getTopBlock() { MachineBasicBlock *MachineLoop::getBottomBlock() { MachineBasicBlock *BotMBB = getHeader(); MachineFunction::iterator End = BotMBB->getParent()->end(); - if (BotMBB != prior(End)) { - MachineBasicBlock *NextMBB = llvm::next(MachineFunction::iterator(BotMBB)); + if (BotMBB != std::prev(End)) { + MachineBasicBlock *NextMBB = std::next(MachineFunction::iterator(BotMBB)); while (contains(NextMBB)) { BotMBB = NextMBB; - if (BotMBB == llvm::next(MachineFunction::iterator(BotMBB))) break; - NextMBB = llvm::next(MachineFunction::iterator(BotMBB)); + if (BotMBB == std::next(MachineFunction::iterator(BotMBB))) break; + NextMBB = std::next(MachineFunction::iterator(BotMBB)); } } return BotMBB; diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp index bb54284..7181025 100644 --- a/lib/CodeGen/MachineModuleInfo.cpp +++ b/lib/CodeGen/MachineModuleInfo.cpp @@ -45,8 +45,8 @@ public: void setMap(MMIAddrLabelMap *map) { Map = map; } - virtual void deleted(); - virtual void allUsesReplacedWith(Value *V2); + void deleted() override; + void allUsesReplacedWith(Value *V2) override; }; class MMIAddrLabelMap { @@ -313,7 +313,7 @@ void MachineModuleInfo::EndFunction() { CallsEHReturn = 0; CallsUnwindInit = 0; CompactUnwindEncoding = 0; - VariableDbgInfo.clear(); + VariableDbgInfos.clear(); } /// AnalyzeModule - Scan the module for global debug information. diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index f8b8796..db3eec3 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -13,9 +13,9 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/raw_os_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Support/raw_os_ostream.h" using namespace llvm; @@ -77,19 +77,12 @@ MachineRegisterInfo::recomputeRegClass(unsigned Reg, const TargetMachine &TM) { return false; // Accumulate constraints from all uses. - for (reg_nodbg_iterator I = reg_nodbg_begin(Reg), E = reg_nodbg_end(); I != E; - ++I) { - const TargetRegisterClass *OpRC = - I->getRegClassConstraint(I.getOperandNo(), TII, - getTargetRegisterInfo()); - if (unsigned SubIdx = I.getOperand().getSubReg()) { - if (OpRC) - NewRC = getTargetRegisterInfo()->getMatchingSuperRegClass(NewRC, OpRC, - SubIdx); - else - NewRC = getTargetRegisterInfo()->getSubClassWithSubReg(NewRC, SubIdx); - } else if (OpRC) - NewRC = getTargetRegisterInfo()->getCommonSubClass(NewRC, OpRC); + for (MachineOperand &MO : reg_nodbg_operands(Reg)) { + // Apply the effect of the given operand to NewRC. + MachineInstr *MI = MO.getParent(); + unsigned OpNo = &MO - &MI->getOperand(0); + NewRC = MI->getRegClassConstraintEffect(OpNo, NewRC, TII, + getTargetRegisterInfo()); if (!NewRC || NewRC == OldRC) return false; } @@ -133,8 +126,8 @@ void MachineRegisterInfo::clearVirtRegs() { void MachineRegisterInfo::verifyUseList(unsigned Reg) const { #ifndef NDEBUG bool Valid = true; - for (reg_iterator I = reg_begin(Reg), E = reg_end(); I != E; ++I) { - MachineOperand *MO = &I.getOperand(); + for (MachineOperand &M : reg_operands(Reg)) { + MachineOperand *MO = &M; MachineInstr *MI = MO->getParent(); if (!MI) { errs() << PrintReg(Reg, getTargetRegisterInfo()) @@ -295,7 +288,7 @@ void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) { // TODO: This could be more efficient by bulk changing the operands. for (reg_iterator I = reg_begin(FromReg), E = reg_end(); I != E; ) { - MachineOperand &O = I.getOperand(); + MachineOperand &O = *I; ++I; O.setReg(ToReg); } @@ -307,8 +300,8 @@ void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) { /// form, so there should only be one definition. MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const { // Since we are in SSA form, we can use the first definition. - def_iterator I = def_begin(Reg); - assert((I.atEnd() || llvm::next(I) == def_end()) && + def_instr_iterator I = def_instr_begin(Reg); + assert((I.atEnd() || std::next(I) == def_instr_end()) && "getVRegDef assumes a single definition or no definition"); return !I.atEnd() ? &*I : 0; } @@ -318,8 +311,8 @@ MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const { /// multiple definitions or no definition, return null. MachineInstr *MachineRegisterInfo::getUniqueVRegDef(unsigned Reg) const { if (def_empty(Reg)) return 0; - def_iterator I = def_begin(Reg); - if (llvm::next(I) != def_end()) + def_instr_iterator I = def_instr_begin(Reg); + if (std::next(I) != def_instr_end()) return 0; return &*I; } @@ -336,8 +329,8 @@ bool MachineRegisterInfo::hasOneNonDBGUse(unsigned RegNo) const { /// optimization passes which extend register lifetimes and need only /// preserve conservative kill flag information. void MachineRegisterInfo::clearKillFlags(unsigned Reg) const { - for (use_iterator UI = use_begin(Reg), UE = use_end(); UI != UE; ++UI) - UI.getOperand().setIsKill(false); + for (MachineOperand &MO : use_operands(Reg)) + MO.setIsKill(false); } bool MachineRegisterInfo::isLiveIn(unsigned Reg) const { @@ -399,8 +392,8 @@ MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB, #ifndef NDEBUG void MachineRegisterInfo::dumpUses(unsigned Reg) const { - for (use_iterator I = use_begin(Reg), E = use_end(); I != E; ++I) - I.getOperand().getParent()->dump(); + for (MachineInstr &I : use_instructions(Reg)) + I.dump(); } #endif @@ -422,3 +415,18 @@ bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg, return false; return true; } + +/// markUsesInDebugValueAsUndef - Mark every DBG_VALUE referencing the +/// specified register as undefined which causes the DBG_VALUE to be +/// deleted during LiveDebugVariables analysis. +void MachineRegisterInfo::markUsesInDebugValueAsUndef(unsigned Reg) const { + // Mark any DBG_VALUE that uses Reg as undef (but don't delete it.) + MachineRegisterInfo::use_instr_iterator nextI; + for (use_instr_iterator I = use_instr_begin(Reg), E = use_instr_end(); + I != E; I = nextI) { + nextI = std::next(I); // I is invalidated by the setReg + MachineInstr *UseMI = &*I; + if (UseMI->isDebugValue()) + UseMI->getOperand(0).setReg(0U); + } +} diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp index 17f0af8..77496ad 100644 --- a/lib/CodeGen/MachineSSAUpdater.cpp +++ b/lib/CodeGen/MachineSSAUpdater.cpp @@ -230,16 +230,6 @@ void MachineSSAUpdater::RewriteUse(MachineOperand &U) { U.setReg(NewVR); } -void MachineSSAUpdater::ReplaceRegWith(unsigned OldReg, unsigned NewReg) { - MRI->replaceRegWith(OldReg, NewReg); - - AvailableValsTy &AvailableVals = getAvailableVals(AV); - for (DenseMap<MachineBasicBlock*, unsigned>::iterator - I = AvailableVals.begin(), E = AvailableVals.end(); I != E; ++I) - if (I->second == OldReg) - I->second = NewReg; -} - /// SSAUpdaterTraits<MachineSSAUpdater> - Traits for the SSAUpdaterImpl /// template, specialized for MachineSSAUpdater. namespace llvm { diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index e71c4df..d90cd23 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -15,7 +15,6 @@ #define DEBUG_TYPE "misched" #include "llvm/CodeGen/MachineScheduler.h" -#include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/PriorityQueue.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -49,6 +48,11 @@ static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden, static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden, cl::desc("Stop scheduling after N instructions"), cl::init(~0U)); + +static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden, + cl::desc("Only schedule this function")); +static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden, + cl::desc("Only schedule this MBB#")); #else static bool ViewMISchedDAGs = false; #endif // NDEBUG @@ -90,24 +94,46 @@ MachineSchedContext::~MachineSchedContext() { } namespace { +/// Base class for a machine scheduler class that can run at any point. +class MachineSchedulerBase : public MachineSchedContext, + public MachineFunctionPass { +public: + MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {} + + void print(raw_ostream &O, const Module* = 0) const override; + +protected: + void scheduleRegions(ScheduleDAGInstrs &Scheduler); +}; + /// MachineScheduler runs after coalescing and before register allocation. -class MachineScheduler : public MachineSchedContext, - public MachineFunctionPass { +class MachineScheduler : public MachineSchedulerBase { public: MachineScheduler(); - virtual void getAnalysisUsage(AnalysisUsage &AU) const; + void getAnalysisUsage(AnalysisUsage &AU) const override; + + bool runOnMachineFunction(MachineFunction&) override; + + static char ID; // Class identification, replacement for typeinfo + +protected: + ScheduleDAGInstrs *createMachineScheduler(); +}; - virtual void releaseMemory() {} +/// PostMachineScheduler runs after shortly before code emission. +class PostMachineScheduler : public MachineSchedulerBase { +public: + PostMachineScheduler(); - virtual bool runOnMachineFunction(MachineFunction&); + void getAnalysisUsage(AnalysisUsage &AU) const override; - virtual void print(raw_ostream &O, const Module* = 0) const; + bool runOnMachineFunction(MachineFunction&) override; static char ID; // Class identification, replacement for typeinfo protected: - ScheduleDAGInstrs *createMachineScheduler(); + ScheduleDAGInstrs *createPostMachineScheduler(); }; } // namespace @@ -124,7 +150,7 @@ INITIALIZE_PASS_END(MachineScheduler, "misched", "Machine Instruction Scheduler", false, false) MachineScheduler::MachineScheduler() -: MachineFunctionPass(ID) { +: MachineSchedulerBase(ID) { initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); } @@ -141,6 +167,26 @@ void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } +char PostMachineScheduler::ID = 0; + +char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID; + +INITIALIZE_PASS(PostMachineScheduler, "postmisched", + "PostRA Machine Instruction Scheduler", false, false) + +PostMachineScheduler::PostMachineScheduler() +: MachineSchedulerBase(ID) { + initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry()); +} + +void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequiredID(MachineDominatorsID); + AU.addRequired<MachineLoopInfo>(); + AU.addRequired<TargetPassConfig>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + MachinePassRegistry MachineSchedRegistry::Registry; /// A dummy default scheduler factory indicates whether the scheduler @@ -162,8 +208,8 @@ DefaultSchedRegistry("default", "Use the target's default scheduler choice.", /// Forward declare the standard machine scheduler. This will be used as the /// default scheduler if the target does not set a default. -static ScheduleDAGInstrs *createGenericSched(MachineSchedContext *C); - +static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C); +static ScheduleDAGInstrs *createGenericSchedPostRA(MachineSchedContext *C); /// Decrement this iterator until reaching the top or a non-debug instr. static MachineBasicBlock::const_iterator @@ -222,7 +268,20 @@ ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() { return Scheduler; // Default to GenericScheduler. - return createGenericSched(this); + return createGenericSchedLive(this); +} + +/// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by +/// the caller. We don't have a command line option to override the postRA +/// scheduler. The Target must configure it. +ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() { + // Get the postRA scheduler set by the target for this function. + ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this); + if (Scheduler) + return Scheduler; + + // Default to GenericScheduler. + return createGenericSchedPostRA(this); } /// Top-level MachineScheduler pass driver. @@ -252,7 +311,6 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { AA = &getAnalysis<AliasAnalysis>(); LIS = &getAnalysis<LiveIntervals>(); - const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); if (VerifyScheduling) { DEBUG(LIS->dump()); @@ -262,7 +320,60 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { // Instantiate the selected scheduler for this target, function, and // optimization level. - OwningPtr<ScheduleDAGInstrs> Scheduler(createMachineScheduler()); + std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler()); + scheduleRegions(*Scheduler); + + DEBUG(LIS->dump()); + if (VerifyScheduling) + MF->verify(this, "After machine scheduling."); + return true; +} + +bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) { + if (skipOptnoneFunction(*mf.getFunction())) + return false; + + DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs())); + + // Initialize the context of the pass. + MF = &mf; + PassConfig = &getAnalysis<TargetPassConfig>(); + + if (VerifyScheduling) + MF->verify(this, "Before post machine scheduling."); + + // Instantiate the selected scheduler for this target, function, and + // optimization level. + std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler()); + scheduleRegions(*Scheduler); + + if (VerifyScheduling) + MF->verify(this, "After post machine scheduling."); + return true; +} + +/// Return true of the given instruction should not be included in a scheduling +/// region. +/// +/// MachineScheduler does not currently support scheduling across calls. To +/// handle calls, the DAG builder needs to be modified to create register +/// anti/output dependencies on the registers clobbered by the call's regmask +/// operand. In PreRA scheduling, the stack pointer adjustment already prevents +/// scheduling across calls. In PostRA scheduling, we need the isCall to enforce +/// the boundary, but there would be no benefit to postRA scheduling across +/// calls this late anyway. +static bool isSchedBoundary(MachineBasicBlock::iterator MI, + MachineBasicBlock *MBB, + MachineFunction *MF, + const TargetInstrInfo *TII, + bool IsPostRA) { + return MI->isCall() || TII->isSchedulingBoundary(MI, MBB, *MF); +} + +/// Main driver for both MachineScheduler and PostMachineScheduler. +void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) { + const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + bool IsPostRA = Scheduler.isPostRA(); // Visit all machine basic blocks. // @@ -271,7 +382,15 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end(); MBB != MBBEnd; ++MBB) { - Scheduler->startBlock(MBB); + Scheduler.startBlock(MBB); + +#ifndef NDEBUG + if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName()) + continue; + if (SchedOnlyBlock.getNumOccurrences() + && (int)SchedOnlyBlock != MBB->getNumber()) + continue; +#endif // Break the block into scheduling regions [I, RegionEnd), and schedule each // region as soon as it is discovered. RegionEnd points the scheduling @@ -283,13 +402,16 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { // The Scheduler may insert instructions during either schedule() or // exitRegion(), even for empty regions. So the local iterators 'I' and // 'RegionEnd' are invalid across these calls. - unsigned RemainingInstrs = MBB->size(); + // + // MBB::size() uses instr_iterator to count. Here we need a bundle to count + // as a single instruction. + unsigned RemainingInstrs = std::distance(MBB->begin(), MBB->end()); for(MachineBasicBlock::iterator RegionEnd = MBB->end(); - RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) { + RegionEnd != MBB->begin(); RegionEnd = Scheduler.begin()) { // Avoid decrementing RegionEnd for blocks with no terminator. - if (RegionEnd != MBB->end() - || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) { + if (RegionEnd != MBB->end() || + isSchedBoundary(std::prev(RegionEnd), MBB, MF, TII, IsPostRA)) { --RegionEnd; // Count the boundary instruction. --RemainingInstrs; @@ -300,21 +422,22 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { unsigned NumRegionInstrs = 0; MachineBasicBlock::iterator I = RegionEnd; for(;I != MBB->begin(); --I, --RemainingInstrs, ++NumRegionInstrs) { - if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF)) + if (isSchedBoundary(std::prev(I), MBB, MF, TII, IsPostRA)) break; } // Notify the scheduler of the region, even if we may skip scheduling // it. Perhaps it still needs to be bundled. - Scheduler->enterRegion(MBB, I, RegionEnd, NumRegionInstrs); + Scheduler.enterRegion(MBB, I, RegionEnd, NumRegionInstrs); // Skip empty scheduling regions (0 or 1 schedulable instructions). - if (I == RegionEnd || I == llvm::prior(RegionEnd)) { + if (I == RegionEnd || I == std::prev(RegionEnd)) { // Close the current region. Bundle the terminator if needed. // This invalidates 'RegionEnd' and 'I'. - Scheduler->exitRegion(); + Scheduler.exitRegion(); continue; } - DEBUG(dbgs() << "********** MI Scheduling **********\n"); + DEBUG(dbgs() << "********** " << ((Scheduler.isPostRA()) ? "PostRA " : "") + << "MI Scheduling **********\n"); DEBUG(dbgs() << MF->getName() << ":BB#" << MBB->getNumber() << " " << MBB->getName() << "\n From: " << *I << " To: "; @@ -325,26 +448,27 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { // Schedule a region: possibly reorder instructions. // This invalidates 'RegionEnd' and 'I'. - Scheduler->schedule(); + Scheduler.schedule(); // Close the current region. - Scheduler->exitRegion(); + Scheduler.exitRegion(); // Scheduling has invalidated the current iterator 'I'. Ask the // scheduler for the top of it's scheduled region. - RegionEnd = Scheduler->begin(); + RegionEnd = Scheduler.begin(); } assert(RemainingInstrs == 0 && "Instruction count mismatch!"); - Scheduler->finishBlock(); + Scheduler.finishBlock(); + if (Scheduler.isPostRA()) { + // FIXME: Ideally, no further passes should rely on kill flags. However, + // thumb2 size reduction is currently an exception. + Scheduler.fixupKills(MBB); + } } - Scheduler->finalizeSchedule(); - DEBUG(LIS->dump()); - if (VerifyScheduling) - MF->verify(this, "After machine scheduling."); - return true; + Scheduler.finalizeSchedule(); } -void MachineScheduler::print(raw_ostream &O, const Module* m) const { +void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const { // unimplemented } @@ -358,12 +482,12 @@ void ReadyQueue::dump() { #endif //===----------------------------------------------------------------------===// -// ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals -// preservation. -//===----------------------------------------------------------------------===// +// ScheduleDAGMI - Basic machine instruction scheduling. This is +// independent of PreRA/PostRA scheduling and involves no extra book-keeping for +// virtual registers. +// ===----------------------------------------------------------------------===/ ScheduleDAGMI::~ScheduleDAGMI() { - delete DFSResult; DeleteContainerPointers(Mutations); delete SchedImpl; } @@ -453,10 +577,24 @@ void ScheduleDAGMI::releasePredecessors(SUnit *SU) { } } +/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after +/// crossing a scheduling boundary. [begin, end) includes all instructions in +/// the region, including the boundary itself and single-instruction regions +/// that don't get scheduled. +void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb, + MachineBasicBlock::iterator begin, + MachineBasicBlock::iterator end, + unsigned regioninstrs) +{ + ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs); + + SchedImpl->initPolicy(begin, end, regioninstrs); +} + /// This is normally called from the main scheduler loop but may also be invoked /// by the scheduling strategy to perform additional code motion. -void ScheduleDAGMI::moveInstruction(MachineInstr *MI, - MachineBasicBlock::iterator InsertPos) { +void ScheduleDAGMI::moveInstruction( + MachineInstr *MI, MachineBasicBlock::iterator InsertPos) { // Advance RegionBegin if the first instruction moves down. if (&*RegionBegin == MI) ++RegionBegin; @@ -465,7 +603,8 @@ void ScheduleDAGMI::moveInstruction(MachineInstr *MI, BB->splice(InsertPos, BB, MI); // Update LiveIntervals - LIS->handleMove(MI, /*UpdateFlags=*/true); + if (LIS) + LIS->handleMove(MI, /*UpdateFlags=*/true); // Recede RegionBegin if an instruction moves above the first. if (RegionBegin == InsertPos) @@ -483,31 +622,212 @@ bool ScheduleDAGMI::checkSchedLimit() { return true; } +/// Per-region scheduling driver, called back from +/// MachineScheduler::runOnMachineFunction. This is a simplified driver that +/// does not consider liveness or register pressure. It is useful for PostRA +/// scheduling and potentially other custom schedulers. +void ScheduleDAGMI::schedule() { + // Build the DAG. + buildSchedGraph(AA); + + Topo.InitDAGTopologicalSorting(); + + postprocessDAG(); + + SmallVector<SUnit*, 8> TopRoots, BotRoots; + findRootsAndBiasEdges(TopRoots, BotRoots); + + // Initialize the strategy before modifying the DAG. + // This may initialize a DFSResult to be used for queue priority. + SchedImpl->initialize(this); + + DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) + SUnits[su].dumpAll(this)); + if (ViewMISchedDAGs) viewGraph(); + + // Initialize ready queues now that the DAG and priority data are finalized. + initQueues(TopRoots, BotRoots); + + bool IsTopNode = false; + while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { + assert(!SU->isScheduled && "Node already scheduled"); + if (!checkSchedLimit()) + break; + + MachineInstr *MI = SU->getInstr(); + if (IsTopNode) { + assert(SU->isTopReady() && "node still has unscheduled dependencies"); + if (&*CurrentTop == MI) + CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom); + else + moveInstruction(MI, CurrentTop); + } + else { + assert(SU->isBottomReady() && "node still has unscheduled dependencies"); + MachineBasicBlock::iterator priorII = + priorNonDebug(CurrentBottom, CurrentTop); + if (&*priorII == MI) + CurrentBottom = priorII; + else { + if (&*CurrentTop == MI) + CurrentTop = nextIfDebug(++CurrentTop, priorII); + moveInstruction(MI, CurrentBottom); + CurrentBottom = MI; + } + } + updateQueues(SU, IsTopNode); + + // Notify the scheduling strategy after updating the DAG. + SchedImpl->schedNode(SU, IsTopNode); + } + assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); + + placeDebugValues(); + + DEBUG({ + unsigned BBNum = begin()->getParent()->getNumber(); + dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n"; + dumpSchedule(); + dbgs() << '\n'; + }); +} + +/// Apply each ScheduleDAGMutation step in order. +void ScheduleDAGMI::postprocessDAG() { + for (unsigned i = 0, e = Mutations.size(); i < e; ++i) { + Mutations[i]->apply(this); + } +} + +void ScheduleDAGMI:: +findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots, + SmallVectorImpl<SUnit*> &BotRoots) { + for (std::vector<SUnit>::iterator + I = SUnits.begin(), E = SUnits.end(); I != E; ++I) { + SUnit *SU = &(*I); + assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits"); + + // Order predecessors so DFSResult follows the critical path. + SU->biasCriticalPath(); + + // A SUnit is ready to top schedule if it has no predecessors. + if (!I->NumPredsLeft) + TopRoots.push_back(SU); + // A SUnit is ready to bottom schedule if it has no successors. + if (!I->NumSuccsLeft) + BotRoots.push_back(SU); + } + ExitSU.biasCriticalPath(); +} + +/// Identify DAG roots and setup scheduler queues. +void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots, + ArrayRef<SUnit*> BotRoots) { + NextClusterSucc = NULL; + NextClusterPred = NULL; + + // Release all DAG roots for scheduling, not including EntrySU/ExitSU. + // + // Nodes with unreleased weak edges can still be roots. + // Release top roots in forward order. + for (SmallVectorImpl<SUnit*>::const_iterator + I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) { + SchedImpl->releaseTopNode(*I); + } + // Release bottom roots in reverse order so the higher priority nodes appear + // first. This is more natural and slightly more efficient. + for (SmallVectorImpl<SUnit*>::const_reverse_iterator + I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) { + SchedImpl->releaseBottomNode(*I); + } + + releaseSuccessors(&EntrySU); + releasePredecessors(&ExitSU); + + SchedImpl->registerRoots(); + + // Advance past initial DebugValues. + CurrentTop = nextIfDebug(RegionBegin, RegionEnd); + CurrentBottom = RegionEnd; +} + +/// Update scheduler queues after scheduling an instruction. +void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) { + // Release dependent instructions for scheduling. + if (IsTopNode) + releaseSuccessors(SU); + else + releasePredecessors(SU); + + SU->isScheduled = true; +} + +/// Reinsert any remaining debug_values, just like the PostRA scheduler. +void ScheduleDAGMI::placeDebugValues() { + // If first instruction was a DBG_VALUE then put it back. + if (FirstDbgValue) { + BB->splice(RegionBegin, BB, FirstDbgValue); + RegionBegin = FirstDbgValue; + } + + for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator + DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { + std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI); + MachineInstr *DbgValue = P.first; + MachineBasicBlock::iterator OrigPrevMI = P.second; + if (&*RegionBegin == DbgValue) + ++RegionBegin; + BB->splice(++OrigPrevMI, BB, DbgValue); + if (OrigPrevMI == std::prev(RegionEnd)) + RegionEnd = DbgValue; + } + DbgValues.clear(); + FirstDbgValue = NULL; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void ScheduleDAGMI::dumpSchedule() const { + for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) { + if (SUnit *SU = getSUnit(&(*MI))) + SU->dump(this); + else + dbgs() << "Missing SUnit\n"; + } +} +#endif + +//===----------------------------------------------------------------------===// +// ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals +// preservation. +//===----------------------------------------------------------------------===// + +ScheduleDAGMILive::~ScheduleDAGMILive() { + delete DFSResult; +} + /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after /// crossing a scheduling boundary. [begin, end) includes all instructions in /// the region, including the boundary itself and single-instruction regions /// that don't get scheduled. -void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb, +void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) { - ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs); + // ScheduleDAGMI initializes SchedImpl's per-region policy. + ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs); // For convenience remember the end of the liveness region. - LiveRegionEnd = - (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd); + LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd); SUPressureDiffs.clear(); - SchedImpl->initPolicy(begin, end, regioninstrs); - ShouldTrackPressure = SchedImpl->shouldTrackPressure(); } // Setup the register pressure trackers for the top scheduled top and bottom // scheduled regions. -void ScheduleDAGMI::initRegPressure() { +void ScheduleDAGMILive::initRegPressure() { TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin); BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); @@ -567,7 +887,7 @@ void ScheduleDAGMI::initRegPressure() { dbgs() << "\n"); } -void ScheduleDAGMI:: +void ScheduleDAGMILive:: updateScheduledPressure(const SUnit *SU, const std::vector<unsigned> &NewMaxPressure) { const PressureDiff &PDiff = getPressureDiff(SU); @@ -595,7 +915,7 @@ updateScheduledPressure(const SUnit *SU, /// Update the PressureDiff array for liveness after scheduling this /// instruction. -void ScheduleDAGMI::updatePressureDiffs(ArrayRef<unsigned> LiveUses) { +void ScheduleDAGMILive::updatePressureDiffs(ArrayRef<unsigned> LiveUses) { for (unsigned LUIdx = 0, LUEnd = LiveUses.size(); LUIdx != LUEnd; ++LUIdx) { /// FIXME: Currently assuming single-use physregs. unsigned Reg = LiveUses[LUIdx]; @@ -644,9 +964,9 @@ void ScheduleDAGMI::updatePressureDiffs(ArrayRef<unsigned> LiveUses) { /// so that it can be easilly extended by experimental schedulers. Generally, /// implementing MachineSchedStrategy should be sufficient to implement a new /// scheduling algorithm. However, if a scheduler further subclasses -/// ScheduleDAGMI then it will want to override this virtual method in order to -/// update any specialized state. -void ScheduleDAGMI::schedule() { +/// ScheduleDAGMILive then it will want to override this virtual method in order +/// to update any specialized state. +void ScheduleDAGMILive::schedule() { buildDAGWithRegPressure(); Topo.InitDAGTopologicalSorting(); @@ -667,6 +987,11 @@ void ScheduleDAGMI::schedule() { // Initialize ready queues now that the DAG and priority data are finalized. initQueues(TopRoots, BotRoots); + if (ShouldTrackPressure) { + assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker"); + TopRPTracker.setPos(CurrentTop); + } + bool IsTopNode = false; while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { assert(!SU->isScheduled && "Node already scheduled"); @@ -676,6 +1001,18 @@ void ScheduleDAGMI::schedule() { scheduleMI(SU, IsTopNode); updateQueues(SU, IsTopNode); + + if (DFSResult) { + unsigned SubtreeID = DFSResult->getSubtreeID(SU); + if (!ScheduledTrees.test(SubtreeID)) { + ScheduledTrees.set(SubtreeID); + DFSResult->scheduleTree(SubtreeID); + SchedImpl->scheduleTree(SubtreeID); + } + } + + // Notify the scheduling strategy after updating the DAG. + SchedImpl->schedNode(SU, IsTopNode); } assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); @@ -690,7 +1027,7 @@ void ScheduleDAGMI::schedule() { } /// Build the DAG and setup three register pressure trackers. -void ScheduleDAGMI::buildDAGWithRegPressure() { +void ScheduleDAGMILive::buildDAGWithRegPressure() { if (!ShouldTrackPressure) { RPTracker.reset(); RegionCriticalPSets.clear(); @@ -713,14 +1050,7 @@ void ScheduleDAGMI::buildDAGWithRegPressure() { initRegPressure(); } -/// Apply each ScheduleDAGMutation step in order. -void ScheduleDAGMI::postprocessDAG() { - for (unsigned i = 0, e = Mutations.size(); i < e; ++i) { - Mutations[i]->apply(this); - } -} - -void ScheduleDAGMI::computeDFSResult() { +void ScheduleDAGMILive::computeDFSResult() { if (!DFSResult) DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize); DFSResult->clear(); @@ -730,26 +1060,6 @@ void ScheduleDAGMI::computeDFSResult() { ScheduledTrees.resize(DFSResult->getNumSubtrees()); } -void ScheduleDAGMI::findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots, - SmallVectorImpl<SUnit*> &BotRoots) { - for (std::vector<SUnit>::iterator - I = SUnits.begin(), E = SUnits.end(); I != E; ++I) { - SUnit *SU = &(*I); - assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits"); - - // Order predecessors so DFSResult follows the critical path. - SU->biasCriticalPath(); - - // A SUnit is ready to top schedule if it has no predecessors. - if (!I->NumPredsLeft) - TopRoots.push_back(SU); - // A SUnit is ready to bottom schedule if it has no successors. - if (!I->NumSuccsLeft) - BotRoots.push_back(SU); - } - ExitSU.biasCriticalPath(); -} - /// Compute the max cyclic critical path through the DAG. The scheduling DAG /// only provides the critical path for single block loops. To handle loops that /// span blocks, we could use the vreg path latencies provided by @@ -773,7 +1083,10 @@ void ScheduleDAGMI::findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots, /// LiveOutDepth - LiveInDepth = 3 - 1 = 2 /// LiveInHeight - LiveOutHeight = 4 - 2 = 2 /// CyclicCriticalPath = min(2, 2) = 2 -unsigned ScheduleDAGMI::computeCyclicCriticalPath() { +/// +/// This could be relevant to PostRA scheduling, but is currently implemented +/// assuming LiveIntervals. +unsigned ScheduleDAGMILive::computeCyclicCriticalPath() { // This only applies to single block loop. if (!BB->isSuccessor(BB)) return 0; @@ -835,44 +1148,8 @@ unsigned ScheduleDAGMI::computeCyclicCriticalPath() { return MaxCyclicLatency; } -/// Identify DAG roots and setup scheduler queues. -void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots, - ArrayRef<SUnit*> BotRoots) { - NextClusterSucc = NULL; - NextClusterPred = NULL; - - // Release all DAG roots for scheduling, not including EntrySU/ExitSU. - // - // Nodes with unreleased weak edges can still be roots. - // Release top roots in forward order. - for (SmallVectorImpl<SUnit*>::const_iterator - I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) { - SchedImpl->releaseTopNode(*I); - } - // Release bottom roots in reverse order so the higher priority nodes appear - // first. This is more natural and slightly more efficient. - for (SmallVectorImpl<SUnit*>::const_reverse_iterator - I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) { - SchedImpl->releaseBottomNode(*I); - } - - releaseSuccessors(&EntrySU); - releasePredecessors(&ExitSU); - - SchedImpl->registerRoots(); - - // Advance past initial DebugValues. - CurrentTop = nextIfDebug(RegionBegin, RegionEnd); - CurrentBottom = RegionEnd; - - if (ShouldTrackPressure) { - assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker"); - TopRPTracker.setPos(CurrentTop); - } -} - /// Move an instruction and update register pressure. -void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) { +void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) { // Move the instruction to its new location in the instruction stream. MachineInstr *MI = SU->getInstr(); @@ -917,63 +1194,6 @@ void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) { } } -/// Update scheduler queues after scheduling an instruction. -void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) { - // Release dependent instructions for scheduling. - if (IsTopNode) - releaseSuccessors(SU); - else - releasePredecessors(SU); - - SU->isScheduled = true; - - if (DFSResult) { - unsigned SubtreeID = DFSResult->getSubtreeID(SU); - if (!ScheduledTrees.test(SubtreeID)) { - ScheduledTrees.set(SubtreeID); - DFSResult->scheduleTree(SubtreeID); - SchedImpl->scheduleTree(SubtreeID); - } - } - - // Notify the scheduling strategy after updating the DAG. - SchedImpl->schedNode(SU, IsTopNode); -} - -/// Reinsert any remaining debug_values, just like the PostRA scheduler. -void ScheduleDAGMI::placeDebugValues() { - // If first instruction was a DBG_VALUE then put it back. - if (FirstDbgValue) { - BB->splice(RegionBegin, BB, FirstDbgValue); - RegionBegin = FirstDbgValue; - } - - for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator - DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { - std::pair<MachineInstr *, MachineInstr *> P = *prior(DI); - MachineInstr *DbgValue = P.first; - MachineBasicBlock::iterator OrigPrevMI = P.second; - if (&*RegionBegin == DbgValue) - ++RegionBegin; - BB->splice(++OrigPrevMI, BB, DbgValue); - if (OrigPrevMI == llvm::prior(RegionEnd)) - RegionEnd = DbgValue; - } - DbgValues.clear(); - FirstDbgValue = NULL; -} - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void ScheduleDAGMI::dumpSchedule() const { - for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) { - if (SUnit *SU = getSUnit(&(*MI))) - SU->dump(this); - else - dbgs() << "Missing SUnit\n"; - } -} -#endif - //===----------------------------------------------------------------------===// // LoadClusterMutation - DAG post-processing to cluster loads. //===----------------------------------------------------------------------===// @@ -988,9 +1208,11 @@ class LoadClusterMutation : public ScheduleDAGMutation { unsigned Offset; LoadInfo(SUnit *su, unsigned reg, unsigned ofs) : SU(su), BaseReg(reg), Offset(ofs) {} + + bool operator<(const LoadInfo &RHS) const { + return std::tie(BaseReg, Offset) < std::tie(RHS.BaseReg, RHS.Offset); + } }; - static bool LoadInfoLess(const LoadClusterMutation::LoadInfo &LHS, - const LoadClusterMutation::LoadInfo &RHS); const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; @@ -999,20 +1221,12 @@ public: const TargetRegisterInfo *tri) : TII(tii), TRI(tri) {} - virtual void apply(ScheduleDAGMI *DAG); + void apply(ScheduleDAGMI *DAG) override; protected: void clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG); }; } // anonymous -bool LoadClusterMutation::LoadInfoLess( - const LoadClusterMutation::LoadInfo &LHS, - const LoadClusterMutation::LoadInfo &RHS) { - if (LHS.BaseReg != RHS.BaseReg) - return LHS.BaseReg < RHS.BaseReg; - return LHS.Offset < RHS.Offset; -} - void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG) { SmallVector<LoadClusterMutation::LoadInfo,32> LoadRecords; @@ -1025,7 +1239,7 @@ void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads, } if (LoadRecords.size() < 2) return; - std::sort(LoadRecords.begin(), LoadRecords.end(), LoadInfoLess); + std::sort(LoadRecords.begin(), LoadRecords.end()); unsigned ClusterLength = 1; for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) { if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) { @@ -1102,7 +1316,7 @@ class MacroFusion : public ScheduleDAGMutation { public: MacroFusion(const TargetInstrInfo *tii): TII(tii) {} - virtual void apply(ScheduleDAGMI *DAG); + void apply(ScheduleDAGMI *DAG) override; }; } // anonymous @@ -1151,10 +1365,10 @@ class CopyConstrain : public ScheduleDAGMutation { public: CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {} - virtual void apply(ScheduleDAGMI *DAG); + void apply(ScheduleDAGMI *DAG) override; protected: - void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG); + void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG); }; } // anonymous @@ -1177,7 +1391,7 @@ protected: /// this algorithm should handle extended blocks. An EBB is a set of /// contiguously numbered blocks such that the previous block in the EBB is /// always the single predecessor. -void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG) { +void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) { LiveIntervals *LIS = DAG->getLIS(); MachineInstr *Copy = CopySU->getInstr(); @@ -1227,19 +1441,19 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG) { // Check if GlobalLI contains a hole in the vicinity of LocalLI. if (GlobalSegment != GlobalLI->begin()) { // Two address defs have no hole. - if (SlotIndex::isSameInstr(llvm::prior(GlobalSegment)->end, + if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end, GlobalSegment->start)) { return; } // If the prior global segment may be defined by the same two-address // instruction that also defines LocalLI, then can't make a hole here. - if (SlotIndex::isSameInstr(llvm::prior(GlobalSegment)->start, + if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start, LocalLI->beginIndex())) { return; } // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise // it would be a disconnected component in the live range. - assert(llvm::prior(GlobalSegment)->start < LocalLI->beginIndex() && + assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() && "Disconnected LRG within the scheduling region."); } MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start); @@ -1302,6 +1516,8 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG) { /// \brief Callback from DAG postProcessing to create weak edges to encourage /// copy elimination. void CopyConstrain::apply(ScheduleDAGMI *DAG) { + assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals"); + MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end()); if (FirstPos == DAG->end()) return; @@ -1314,370 +1530,53 @@ void CopyConstrain::apply(ScheduleDAGMI *DAG) { if (!SU->getInstr()->isCopy()) continue; - constrainLocalCopy(SU, DAG); + constrainLocalCopy(SU, static_cast<ScheduleDAGMILive*>(DAG)); } } //===----------------------------------------------------------------------===// -// GenericScheduler - Implementation of the generic MachineSchedStrategy. +// MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler +// and possibly other custom schedulers. //===----------------------------------------------------------------------===// -namespace { -/// GenericScheduler shrinks the unscheduled zone using heuristics to balance -/// the schedule. -class GenericScheduler : public MachineSchedStrategy { -public: - /// Represent the type of SchedCandidate found within a single queue. - /// pickNodeBidirectional depends on these listed by decreasing priority. - enum CandReason { - NoCand, PhysRegCopy, RegExcess, RegCritical, Cluster, Weak, RegMax, - ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce, - TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder}; - -#ifndef NDEBUG - static const char *getReasonStr(GenericScheduler::CandReason Reason); -#endif - - /// Policy for scheduling the next instruction in the candidate's zone. - struct CandPolicy { - bool ReduceLatency; - unsigned ReduceResIdx; - unsigned DemandResIdx; - - CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {} - }; - - /// Status of an instruction's critical resource consumption. - struct SchedResourceDelta { - // Count critical resources in the scheduled region required by SU. - unsigned CritResources; - - // Count critical resources from another region consumed by SU. - unsigned DemandedResources; - - SchedResourceDelta(): CritResources(0), DemandedResources(0) {} - - bool operator==(const SchedResourceDelta &RHS) const { - return CritResources == RHS.CritResources - && DemandedResources == RHS.DemandedResources; - } - bool operator!=(const SchedResourceDelta &RHS) const { - return !operator==(RHS); - } - }; - - /// Store the state used by GenericScheduler heuristics, required for the - /// lifetime of one invocation of pickNode(). - struct SchedCandidate { - CandPolicy Policy; - - // The best SUnit candidate. - SUnit *SU; - - // The reason for this candidate. - CandReason Reason; - - // Set of reasons that apply to multiple candidates. - uint32_t RepeatReasonSet; - - // Register pressure values for the best candidate. - RegPressureDelta RPDelta; - - // Critical resource consumption of the best candidate. - SchedResourceDelta ResDelta; - - SchedCandidate(const CandPolicy &policy) - : Policy(policy), SU(NULL), Reason(NoCand), RepeatReasonSet(0) {} +static const unsigned InvalidCycle = ~0U; - bool isValid() const { return SU; } - - // Copy the status of another candidate without changing policy. - void setBest(SchedCandidate &Best) { - assert(Best.Reason != NoCand && "uninitialized Sched candidate"); - SU = Best.SU; - Reason = Best.Reason; - RPDelta = Best.RPDelta; - ResDelta = Best.ResDelta; - } - - bool isRepeat(CandReason R) { return RepeatReasonSet & (1 << R); } - void setRepeat(CandReason R) { RepeatReasonSet |= (1 << R); } - - void initResourceDelta(const ScheduleDAGMI *DAG, - const TargetSchedModel *SchedModel); - }; - - /// Summarize the unscheduled region. - struct SchedRemainder { - // Critical path through the DAG in expected latency. - unsigned CriticalPath; - unsigned CyclicCritPath; - - // Scaled count of micro-ops left to schedule. - unsigned RemIssueCount; - - bool IsAcyclicLatencyLimited; - - // Unscheduled resources - SmallVector<unsigned, 16> RemainingCounts; - - void reset() { - CriticalPath = 0; - CyclicCritPath = 0; - RemIssueCount = 0; - IsAcyclicLatencyLimited = false; - RemainingCounts.clear(); - } - - SchedRemainder() { reset(); } - - void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel); - }; - - /// Each Scheduling boundary is associated with ready queues. It tracks the - /// current cycle in the direction of movement, and maintains the state - /// of "hazards" and other interlocks at the current cycle. - struct SchedBoundary { - ScheduleDAGMI *DAG; - const TargetSchedModel *SchedModel; - SchedRemainder *Rem; - - ReadyQueue Available; - ReadyQueue Pending; - bool CheckPending; - - // For heuristics, keep a list of the nodes that immediately depend on the - // most recently scheduled node. - SmallPtrSet<const SUnit*, 8> NextSUs; - - ScheduleHazardRecognizer *HazardRec; - - /// Number of cycles it takes to issue the instructions scheduled in this - /// zone. It is defined as: scheduled-micro-ops / issue-width + stalls. - /// See getStalls(). - unsigned CurrCycle; - - /// Micro-ops issued in the current cycle - unsigned CurrMOps; - - /// MinReadyCycle - Cycle of the soonest available instruction. - unsigned MinReadyCycle; - - // The expected latency of the critical path in this scheduled zone. - unsigned ExpectedLatency; - - // The latency of dependence chains leading into this zone. - // For each node scheduled bottom-up: DLat = max DLat, N.Depth. - // For each cycle scheduled: DLat -= 1. - unsigned DependentLatency; - - /// Count the scheduled (issued) micro-ops that can be retired by - /// time=CurrCycle assuming the first scheduled instr is retired at time=0. - unsigned RetiredMOps; - - // Count scheduled resources that have been executed. Resources are - // considered executed if they become ready in the time that it takes to - // saturate any resource including the one in question. Counts are scaled - // for direct comparison with other resources. Counts can be compared with - // MOps * getMicroOpFactor and Latency * getLatencyFactor. - SmallVector<unsigned, 16> ExecutedResCounts; - - /// Cache the max count for a single resource. - unsigned MaxExecutedResCount; - - // Cache the critical resources ID in this scheduled zone. - unsigned ZoneCritResIdx; - - // Is the scheduled region resource limited vs. latency limited. - bool IsResourceLimited; - -#ifndef NDEBUG - // Remember the greatest operand latency as an upper bound on the number of - // times we should retry the pending queue because of a hazard. - unsigned MaxObservedLatency; -#endif - - void reset() { - // A new HazardRec is created for each DAG and owned by SchedBoundary. - // Destroying and reconstructing it is very expensive though. So keep - // invalid, placeholder HazardRecs. - if (HazardRec && HazardRec->isEnabled()) { - delete HazardRec; - HazardRec = 0; - } - Available.clear(); - Pending.clear(); - CheckPending = false; - NextSUs.clear(); - CurrCycle = 0; - CurrMOps = 0; - MinReadyCycle = UINT_MAX; - ExpectedLatency = 0; - DependentLatency = 0; - RetiredMOps = 0; - MaxExecutedResCount = 0; - ZoneCritResIdx = 0; - IsResourceLimited = false; -#ifndef NDEBUG - MaxObservedLatency = 0; -#endif - // Reserve a zero-count for invalid CritResIdx. - ExecutedResCounts.resize(1); - assert(!ExecutedResCounts[0] && "nonzero count for bad resource"); - } - - /// Pending queues extend the ready queues with the same ID and the - /// PendingFlag set. - SchedBoundary(unsigned ID, const Twine &Name): - DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"), - Pending(ID << GenericScheduler::LogMaxQID, Name+".P"), - HazardRec(0) { - reset(); - } - - ~SchedBoundary() { delete HazardRec; } - - void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, - SchedRemainder *rem); - - bool isTop() const { - return Available.getID() == GenericScheduler::TopQID; - } - -#ifndef NDEBUG - const char *getResourceName(unsigned PIdx) { - if (!PIdx) - return "MOps"; - return SchedModel->getProcResource(PIdx)->Name; - } -#endif - - /// Get the number of latency cycles "covered" by the scheduled - /// instructions. This is the larger of the critical path within the zone - /// and the number of cycles required to issue the instructions. - unsigned getScheduledLatency() const { - return std::max(ExpectedLatency, CurrCycle); - } - - unsigned getUnscheduledLatency(SUnit *SU) const { - return isTop() ? SU->getHeight() : SU->getDepth(); - } - - unsigned getResourceCount(unsigned ResIdx) const { - return ExecutedResCounts[ResIdx]; - } - - /// Get the scaled count of scheduled micro-ops and resources, including - /// executed resources. - unsigned getCriticalCount() const { - if (!ZoneCritResIdx) - return RetiredMOps * SchedModel->getMicroOpFactor(); - return getResourceCount(ZoneCritResIdx); - } - - /// Get a scaled count for the minimum execution time of the scheduled - /// micro-ops that are ready to execute by getExecutedCount. Notice the - /// feedback loop. - unsigned getExecutedCount() const { - return std::max(CurrCycle * SchedModel->getLatencyFactor(), - MaxExecutedResCount); - } - - bool checkHazard(SUnit *SU); - - unsigned findMaxLatency(ArrayRef<SUnit*> ReadySUs); - - unsigned getOtherResourceCount(unsigned &OtherCritIdx); - - void setPolicy(CandPolicy &Policy, SchedBoundary &OtherZone); - - void releaseNode(SUnit *SU, unsigned ReadyCycle); - - void bumpCycle(unsigned NextCycle); - - void incExecutedResources(unsigned PIdx, unsigned Count); - - unsigned countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle); - - void bumpNode(SUnit *SU); - - void releasePending(); - - void removeReady(SUnit *SU); - - SUnit *pickOnlyChoice(); - -#ifndef NDEBUG - void dumpScheduledState(); -#endif - }; - -private: - const MachineSchedContext *Context; - ScheduleDAGMI *DAG; - const TargetSchedModel *SchedModel; - const TargetRegisterInfo *TRI; - - // State of the top and bottom scheduled instruction boundaries. - SchedRemainder Rem; - SchedBoundary Top; - SchedBoundary Bot; - - MachineSchedPolicy RegionPolicy; -public: - /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both) - enum { - TopQID = 1, - BotQID = 2, - LogMaxQID = 2 - }; - - GenericScheduler(const MachineSchedContext *C): - Context(C), DAG(0), SchedModel(0), TRI(0), - Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {} - - virtual void initPolicy(MachineBasicBlock::iterator Begin, - MachineBasicBlock::iterator End, - unsigned NumRegionInstrs); - - bool shouldTrackPressure() const { return RegionPolicy.ShouldTrackPressure; } - - virtual void initialize(ScheduleDAGMI *dag); - - virtual SUnit *pickNode(bool &IsTopNode); - - virtual void schedNode(SUnit *SU, bool IsTopNode); - - virtual void releaseTopNode(SUnit *SU); - - virtual void releaseBottomNode(SUnit *SU); - - virtual void registerRoots(); - -protected: - void checkAcyclicLatency(); - - void tryCandidate(SchedCandidate &Cand, - SchedCandidate &TryCand, - SchedBoundary &Zone, - const RegPressureTracker &RPTracker, - RegPressureTracker &TempTracker); - - SUnit *pickNodeBidirectional(bool &IsTopNode); - - void pickNodeFromQueue(SchedBoundary &Zone, - const RegPressureTracker &RPTracker, - SchedCandidate &Candidate); - - void reschedulePhysRegCopies(SUnit *SU, bool isTop); +SchedBoundary::~SchedBoundary() { delete HazardRec; } +void SchedBoundary::reset() { + // A new HazardRec is created for each DAG and owned by SchedBoundary. + // Destroying and reconstructing it is very expensive though. So keep + // invalid, placeholder HazardRecs. + if (HazardRec && HazardRec->isEnabled()) { + delete HazardRec; + HazardRec = 0; + } + Available.clear(); + Pending.clear(); + CheckPending = false; + NextSUs.clear(); + CurrCycle = 0; + CurrMOps = 0; + MinReadyCycle = UINT_MAX; + ExpectedLatency = 0; + DependentLatency = 0; + RetiredMOps = 0; + MaxExecutedResCount = 0; + ZoneCritResIdx = 0; + IsResourceLimited = false; + ReservedCycles.clear(); #ifndef NDEBUG - void traceCandidate(const SchedCandidate &Cand); + // Track the maximum number of stall cycles that could arise either from the + // latency of a DAG edge or the number of cycles that a processor resource is + // reserved (SchedBoundary::ReservedCycles). + MaxObservedLatency = 0; #endif -}; -} // namespace + // Reserve a zero-count for invalid CritResIdx. + ExecutedResCounts.resize(1); + assert(!ExecutedResCounts[0] && "nonzero count for bad resource"); +} -void GenericScheduler::SchedRemainder:: +void SchedRemainder:: init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { reset(); if (!SchedModel->hasInstrSchedModel()) @@ -1698,175 +1597,47 @@ init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { } } -void GenericScheduler::SchedBoundary:: +void SchedBoundary:: init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) { reset(); DAG = dag; SchedModel = smodel; Rem = rem; - if (SchedModel->hasInstrSchedModel()) + if (SchedModel->hasInstrSchedModel()) { ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds()); -} - -/// Initialize the per-region scheduling policy. -void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, - MachineBasicBlock::iterator End, - unsigned NumRegionInstrs) { - const TargetMachine &TM = Context->MF->getTarget(); - - // Avoid setting up the register pressure tracker for small regions to save - // compile time. As a rough heuristic, only track pressure when the number of - // schedulable instructions exceeds half the integer register file. - unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs( - TM.getTargetLowering()->getRegClassFor(MVT::i32)); - - RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2); - - // For generic targets, we default to bottom-up, because it's simpler and more - // compile-time optimizations have been implemented in that direction. - RegionPolicy.OnlyBottomUp = true; - - // Allow the subtarget to override default policy. - const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); - ST.overrideSchedPolicy(RegionPolicy, Begin, End, NumRegionInstrs); - - // After subtarget overrides, apply command line options. - if (!EnableRegPressure) - RegionPolicy.ShouldTrackPressure = false; - - // Check -misched-topdown/bottomup can force or unforce scheduling direction. - // e.g. -misched-bottomup=false allows scheduling in both directions. - assert((!ForceTopDown || !ForceBottomUp) && - "-misched-topdown incompatible with -misched-bottomup"); - if (ForceBottomUp.getNumOccurrences() > 0) { - RegionPolicy.OnlyBottomUp = ForceBottomUp; - if (RegionPolicy.OnlyBottomUp) - RegionPolicy.OnlyTopDown = false; - } - if (ForceTopDown.getNumOccurrences() > 0) { - RegionPolicy.OnlyTopDown = ForceTopDown; - if (RegionPolicy.OnlyTopDown) - RegionPolicy.OnlyBottomUp = false; - } -} - -void GenericScheduler::initialize(ScheduleDAGMI *dag) { - DAG = dag; - SchedModel = DAG->getSchedModel(); - TRI = DAG->TRI; - - Rem.init(DAG, SchedModel); - Top.init(DAG, SchedModel, &Rem); - Bot.init(DAG, SchedModel, &Rem); - - // Initialize resource counts. - - // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or - // are disabled, then these HazardRecs will be disabled. - const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); - const TargetMachine &TM = DAG->MF.getTarget(); - if (!Top.HazardRec) { - Top.HazardRec = - TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); - } - if (!Bot.HazardRec) { - Bot.HazardRec = - TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); - } -} - -void GenericScheduler::releaseTopNode(SUnit *SU) { - if (SU->isScheduled) - return; - - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - if (I->isWeak()) - continue; - unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; - unsigned Latency = I->getLatency(); -#ifndef NDEBUG - Top.MaxObservedLatency = std::max(Latency, Top.MaxObservedLatency); -#endif - if (SU->TopReadyCycle < PredReadyCycle + Latency) - SU->TopReadyCycle = PredReadyCycle + Latency; + ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle); } - Top.releaseNode(SU, SU->TopReadyCycle); } -void GenericScheduler::releaseBottomNode(SUnit *SU) { - if (SU->isScheduled) - return; - - assert(SU->getInstr() && "Scheduled SUnit must have instr"); - - for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - if (I->isWeak()) - continue; - unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; - unsigned Latency = I->getLatency(); -#ifndef NDEBUG - Bot.MaxObservedLatency = std::max(Latency, Bot.MaxObservedLatency); -#endif - if (SU->BotReadyCycle < SuccReadyCycle + Latency) - SU->BotReadyCycle = SuccReadyCycle + Latency; - } - Bot.releaseNode(SU, SU->BotReadyCycle); -} - -/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic -/// critical path by more cycles than it takes to drain the instruction buffer. -/// We estimate an upper bounds on in-flight instructions as: -/// -/// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height ) -/// InFlightIterations = AcyclicPath / CyclesPerIteration -/// InFlightResources = InFlightIterations * LoopResources -/// -/// TODO: Check execution resources in addition to IssueCount. -void GenericScheduler::checkAcyclicLatency() { - if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath) - return; - - // Scaled number of cycles per loop iteration. - unsigned IterCount = - std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(), - Rem.RemIssueCount); - // Scaled acyclic critical path. - unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor(); - // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop - unsigned InFlightCount = - (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount; - unsigned BufferLimit = - SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor(); - - Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit; +/// Compute the stall cycles based on this SUnit's ready time. Heuristics treat +/// these "soft stalls" differently than the hard stall cycles based on CPU +/// resources and computed by checkHazard(). A fully in-order model +/// (MicroOpBufferSize==0) will not make use of this since instructions are not +/// available for scheduling until they are ready. However, a weaker in-order +/// model may use this for heuristics. For example, if a processor has in-order +/// behavior when reading certain resources, this may come into play. +unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) { + if (!SU->isUnbuffered) + return 0; - DEBUG(dbgs() << "IssueCycles=" - << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c " - << "IterCycles=" << IterCount / SchedModel->getLatencyFactor() - << "c NumIters=" << (AcyclicCount + IterCount-1) / IterCount - << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor() - << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n"; - if (Rem.IsAcyclicLatencyLimited) - dbgs() << " ACYCLIC LATENCY LIMIT\n"); + unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle); + if (ReadyCycle > CurrCycle) + return ReadyCycle - CurrCycle; + return 0; } -void GenericScheduler::registerRoots() { - Rem.CriticalPath = DAG->ExitSU.getDepth(); - - // Some roots may not feed into ExitSU. Check all of them in case. - for (std::vector<SUnit*>::const_iterator - I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) { - if ((*I)->getDepth() > Rem.CriticalPath) - Rem.CriticalPath = (*I)->getDepth(); - } - DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n'); - - if (EnableCyclicPath) { - Rem.CyclicCritPath = DAG->computeCyclicCriticalPath(); - checkAcyclicLatency(); - } +/// Compute the next cycle at which the given processor resource can be +/// scheduled. +unsigned SchedBoundary:: +getNextResourceCycle(unsigned PIdx, unsigned Cycles) { + unsigned NextUnreserved = ReservedCycles[PIdx]; + // If this resource has never been used, always return cycle zero. + if (NextUnreserved == InvalidCycle) + return 0; + // For bottom-up scheduling add the cycles needed for the current operation. + if (!isTop()) + NextUnreserved += Cycles; + return NextUnreserved; } /// Does this SU have a hazard within the current instruction group. @@ -1882,21 +1653,31 @@ void GenericScheduler::registerRoots() { /// can dispatch per cycle. /// /// TODO: Also check whether the SU must start a new group. -bool GenericScheduler::SchedBoundary::checkHazard(SUnit *SU) { - if (HazardRec->isEnabled()) - return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; - +bool SchedBoundary::checkHazard(SUnit *SU) { + if (HazardRec->isEnabled() + && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) { + return true; + } unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) { DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops=" << SchedModel->getNumMicroOps(SU->getInstr()) << '\n'); return true; } + if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) { + const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + for (TargetSchedModel::ProcResIter + PI = SchedModel->getWriteProcResBegin(SC), + PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { + if (getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles) > CurrCycle) + return true; + } + } return false; } // Find the unscheduled node in ReadySUs with the highest latency. -unsigned GenericScheduler::SchedBoundary:: +unsigned SchedBoundary:: findMaxLatency(ArrayRef<SUnit*> ReadySUs) { SUnit *LateSU = 0; unsigned RemLatency = 0; @@ -1918,7 +1699,7 @@ findMaxLatency(ArrayRef<SUnit*> ReadySUs) { // Count resources in this zone and the remaining unscheduled // instruction. Return the max count, scaled. Set OtherCritIdx to the critical // resource index, or zero if the zone is issue limited. -unsigned GenericScheduler::SchedBoundary:: +unsigned SchedBoundary:: getOtherResourceCount(unsigned &OtherCritIdx) { OtherCritIdx = 0; if (!SchedModel->hasInstrSchedModel()) @@ -1939,74 +1720,12 @@ getOtherResourceCount(unsigned &OtherCritIdx) { if (OtherCritIdx) { DEBUG(dbgs() << " " << Available.getName() << " + Remain CritRes: " << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx) - << " " << getResourceName(OtherCritIdx) << "\n"); + << " " << SchedModel->getResourceName(OtherCritIdx) << "\n"); } return OtherCritCount; } -/// Set the CandPolicy for this zone given the current resources and latencies -/// inside and outside the zone. -void GenericScheduler::SchedBoundary::setPolicy(CandPolicy &Policy, - SchedBoundary &OtherZone) { - // Now that potential stalls have been considered, apply preemptive heuristics - // based on the the total latency and resources inside and outside this - // zone. - - // Compute remaining latency. We need this both to determine whether the - // overall schedule has become latency-limited and whether the instructions - // outside this zone are resource or latency limited. - // - // The "dependent" latency is updated incrementally during scheduling as the - // max height/depth of scheduled nodes minus the cycles since it was - // scheduled: - // DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone - // - // The "independent" latency is the max ready queue depth: - // ILat = max N.depth for N in Available|Pending - // - // RemainingLatency is the greater of independent and dependent latency. - unsigned RemLatency = DependentLatency; - RemLatency = std::max(RemLatency, findMaxLatency(Available.elements())); - RemLatency = std::max(RemLatency, findMaxLatency(Pending.elements())); - - // Compute the critical resource outside the zone. - unsigned OtherCritIdx; - unsigned OtherCount = OtherZone.getOtherResourceCount(OtherCritIdx); - - bool OtherResLimited = false; - if (SchedModel->hasInstrSchedModel()) { - unsigned LFactor = SchedModel->getLatencyFactor(); - OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor; - } - if (!OtherResLimited && (RemLatency + CurrCycle > Rem->CriticalPath)) { - Policy.ReduceLatency |= true; - DEBUG(dbgs() << " " << Available.getName() << " RemainingLatency " - << RemLatency << " + " << CurrCycle << "c > CritPath " - << Rem->CriticalPath << "\n"); - } - // If the same resource is limiting inside and outside the zone, do nothing. - if (ZoneCritResIdx == OtherCritIdx) - return; - - DEBUG( - if (IsResourceLimited) { - dbgs() << " " << Available.getName() << " ResourceLimited: " - << getResourceName(ZoneCritResIdx) << "\n"; - } - if (OtherResLimited) - dbgs() << " RemainingLimit: " << getResourceName(OtherCritIdx) << "\n"; - if (!IsResourceLimited && !OtherResLimited) - dbgs() << " Latency limited both directions.\n"); - - if (IsResourceLimited && !Policy.ReduceResIdx) - Policy.ReduceResIdx = ZoneCritResIdx; - - if (OtherResLimited) - Policy.DemandResIdx = OtherCritIdx; -} - -void GenericScheduler::SchedBoundary::releaseNode(SUnit *SU, - unsigned ReadyCycle) { +void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) { if (ReadyCycle < MinReadyCycle) MinReadyCycle = ReadyCycle; @@ -2022,8 +1741,48 @@ void GenericScheduler::SchedBoundary::releaseNode(SUnit *SU, NextSUs.insert(SU); } +void SchedBoundary::releaseTopNode(SUnit *SU) { + if (SU->isScheduled) + return; + + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isWeak()) + continue; + unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; + unsigned Latency = I->getLatency(); +#ifndef NDEBUG + MaxObservedLatency = std::max(Latency, MaxObservedLatency); +#endif + if (SU->TopReadyCycle < PredReadyCycle + Latency) + SU->TopReadyCycle = PredReadyCycle + Latency; + } + releaseNode(SU, SU->TopReadyCycle); +} + +void SchedBoundary::releaseBottomNode(SUnit *SU) { + if (SU->isScheduled) + return; + + assert(SU->getInstr() && "Scheduled SUnit must have instr"); + + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isWeak()) + continue; + unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; + unsigned Latency = I->getLatency(); +#ifndef NDEBUG + MaxObservedLatency = std::max(Latency, MaxObservedLatency); +#endif + if (SU->BotReadyCycle < SuccReadyCycle + Latency) + SU->BotReadyCycle = SuccReadyCycle + Latency; + } + releaseNode(SU, SU->BotReadyCycle); +} + /// Move the boundary of scheduled code by one cycle. -void GenericScheduler::SchedBoundary::bumpCycle(unsigned NextCycle) { +void SchedBoundary::bumpCycle(unsigned NextCycle) { if (SchedModel->getMicroOpBufferSize() == 0) { assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized"); if (MinReadyCycle > NextCycle) @@ -2061,8 +1820,7 @@ void GenericScheduler::SchedBoundary::bumpCycle(unsigned NextCycle) { DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n'); } -void GenericScheduler::SchedBoundary::incExecutedResources(unsigned PIdx, - unsigned Count) { +void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) { ExecutedResCounts[PIdx] += Count; if (ExecutedResCounts[PIdx] > MaxExecutedResCount) MaxExecutedResCount = ExecutedResCounts[PIdx]; @@ -2075,11 +1833,11 @@ void GenericScheduler::SchedBoundary::incExecutedResources(unsigned PIdx, /// /// \return the next cycle at which the instruction may execute without /// oversubscribing resources. -unsigned GenericScheduler::SchedBoundary:: -countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle) { +unsigned SchedBoundary:: +countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) { unsigned Factor = SchedModel->getResourceFactor(PIdx); unsigned Count = Factor * Cycles; - DEBUG(dbgs() << " " << getResourceName(PIdx) + DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +" << Cycles << "x" << Factor << "u\n"); // Update Executed resources counts. @@ -2092,15 +1850,21 @@ countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle) { if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) { ZoneCritResIdx = PIdx; DEBUG(dbgs() << " *** Critical resource " - << getResourceName(PIdx) << ": " + << SchedModel->getResourceName(PIdx) << ": " << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n"); } - // TODO: We don't yet model reserved resources. It's not hard though. - return CurrCycle; + // For reserved resources, record the highest cycle using the resource. + unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles); + if (NextAvailable > CurrCycle) { + DEBUG(dbgs() << " Resource conflict: " + << SchedModel->getProcResource(PIdx)->Name << " reserved until @" + << NextAvailable << "\n"); + } + return NextAvailable; } /// Move the boundary of scheduled code by one SUnit. -void GenericScheduler::SchedBoundary::bumpNode(SUnit *SU) { +void SchedBoundary::bumpNode(SUnit *SU) { // Update the reservation table. if (HazardRec->isEnabled()) { if (!isTop() && SU->isCall) { @@ -2110,25 +1874,18 @@ void GenericScheduler::SchedBoundary::bumpNode(SUnit *SU) { } HazardRec->EmitInstruction(SU); } + // checkHazard should prevent scheduling multiple instructions per cycle that + // exceed the issue width. const MCSchedClassDesc *SC = DAG->getSchedClass(SU); unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr()); - CurrMOps += IncMOps; - // checkHazard prevents scheduling multiple instructions per cycle that exceed - // issue width. However, we commonly reach the maximum. In this case - // opportunistically bump the cycle to avoid uselessly checking everything in - // the readyQ. Furthermore, a single instruction may produce more than one - // cycle's worth of micro-ops. - // - // TODO: Also check if this SU must end a dispatch group. - unsigned NextCycle = CurrCycle; - if (CurrMOps >= SchedModel->getIssueWidth()) { - ++NextCycle; - DEBUG(dbgs() << " *** Max MOps " << CurrMOps - << " at cycle " << CurrCycle << '\n'); - } + assert( + (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) && + "Cannot schedule this instruction's MicroOps in the current cycle."); + unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle); DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n"); + unsigned NextCycle = CurrCycle; switch (SchedModel->getMicroOpBufferSize()) { case 0: assert(ReadyCycle <= CurrCycle && "Broken PendingQueue"); @@ -2141,7 +1898,11 @@ void GenericScheduler::SchedBoundary::bumpNode(SUnit *SU) { break; default: // We don't currently model the OOO reorder buffer, so consider all - // scheduled MOps to be "retired". + // scheduled MOps to be "retired". We do loosely model in-order resource + // latency. If this instruction uses an in-order resource, account for any + // likely stall cycles. + if (SU->isUnbuffered && ReadyCycle > NextCycle) + NextCycle = ReadyCycle; break; } RetiredMOps += IncMOps; @@ -2169,10 +1930,27 @@ void GenericScheduler::SchedBoundary::bumpNode(SUnit *SU) { PI = SchedModel->getWriteProcResBegin(SC), PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { unsigned RCycle = - countResource(PI->ProcResourceIdx, PI->Cycles, ReadyCycle); + countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle); if (RCycle > NextCycle) NextCycle = RCycle; } + if (SU->hasReservedResource) { + // For reserved resources, record the highest cycle using the resource. + // For top-down scheduling, this is the cycle in which we schedule this + // instruction plus the number of cycles the operations reserves the + // resource. For bottom-up is it simply the instruction's cycle. + for (TargetSchedModel::ProcResIter + PI = SchedModel->getWriteProcResBegin(SC), + PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { + unsigned PIdx = PI->ProcResourceIdx; + if (SchedModel->getProcResource(PIdx)->BufferSize == 0) { + ReservedCycles[PIdx] = isTop() ? NextCycle + PI->Cycles : NextCycle; +#ifndef NDEBUG + MaxObservedLatency = std::max(PI->Cycles, MaxObservedLatency); +#endif + } + } + } } // Update ExpectedLatency and DependentLatency. unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency; @@ -2193,18 +1971,28 @@ void GenericScheduler::SchedBoundary::bumpNode(SUnit *SU) { } else { // After updating ZoneCritResIdx and ExpectedLatency, check if we're - // resource limited. If a stall occured, bumpCycle does this. + // resource limited. If a stall occurred, bumpCycle does this. unsigned LFactor = SchedModel->getLatencyFactor(); IsResourceLimited = (int)(getCriticalCount() - (getScheduledLatency() * LFactor)) > (int)LFactor; } + // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle + // resets CurrMOps. Loop to handle instructions with more MOps than issue in + // one cycle. Since we commonly reach the max MOps here, opportunistically + // bump the cycle to avoid uselessly checking everything in the readyQ. + CurrMOps += IncMOps; + while (CurrMOps >= SchedModel->getIssueWidth()) { + DEBUG(dbgs() << " *** Max MOps " << CurrMOps + << " at cycle " << CurrCycle << '\n'); + bumpCycle(++NextCycle); + } DEBUG(dumpScheduledState()); } /// Release pending ready nodes in to the available queue. This makes them /// visible to heuristics. -void GenericScheduler::SchedBoundary::releasePending() { +void SchedBoundary::releasePending() { // If the available queue is empty, it is safe to reset MinReadyCycle. if (Available.empty()) MinReadyCycle = UINT_MAX; @@ -2234,7 +2022,7 @@ void GenericScheduler::SchedBoundary::releasePending() { } /// Remove SU from the ready set for this boundary. -void GenericScheduler::SchedBoundary::removeReady(SUnit *SU) { +void SchedBoundary::removeReady(SUnit *SU) { if (Available.isInQueue(SU)) Available.remove(Available.find(SU)); else { @@ -2246,7 +2034,7 @@ void GenericScheduler::SchedBoundary::removeReady(SUnit *SU) { /// If this queue only has one ready candidate, return it. As a side effect, /// defer any nodes that now hit a hazard, and advance the cycle until at least /// one node is ready. If multiple instructions are ready, return NULL. -SUnit *GenericScheduler::SchedBoundary::pickOnlyChoice() { +SUnit *SchedBoundary::pickOnlyChoice() { if (CheckPending) releasePending(); @@ -2275,7 +2063,7 @@ SUnit *GenericScheduler::SchedBoundary::pickOnlyChoice() { #ifndef NDEBUG // This is useful information to dump after bumpNode. // Note that the Queue contents are more useful before pickNodeFromQueue. -void GenericScheduler::SchedBoundary::dumpScheduledState() { +void SchedBoundary::dumpScheduledState() { unsigned ResFactor; unsigned ResCount; if (ZoneCritResIdx) { @@ -2291,14 +2079,124 @@ void GenericScheduler::SchedBoundary::dumpScheduledState() { << " Retired: " << RetiredMOps; dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c"; dbgs() << "\n Critical: " << ResCount / LFactor << "c, " - << ResCount / ResFactor << " " << getResourceName(ZoneCritResIdx) + << ResCount / ResFactor << " " + << SchedModel->getResourceName(ZoneCritResIdx) << "\n ExpectedLatency: " << ExpectedLatency << "c\n" << (IsResourceLimited ? " - Resource" : " - Latency") << " limited.\n"; } #endif -void GenericScheduler::SchedCandidate:: +//===----------------------------------------------------------------------===// +// GenericScheduler - Generic implementation of MachineSchedStrategy. +//===----------------------------------------------------------------------===// + +namespace { +/// Base class for GenericScheduler. This class maintains information about +/// scheduling candidates based on TargetSchedModel making it easy to implement +/// heuristics for either preRA or postRA scheduling. +class GenericSchedulerBase : public MachineSchedStrategy { +public: + /// Represent the type of SchedCandidate found within a single queue. + /// pickNodeBidirectional depends on these listed by decreasing priority. + enum CandReason { + NoCand, PhysRegCopy, RegExcess, RegCritical, Stall, Cluster, Weak, RegMax, + ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce, + TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder}; + +#ifndef NDEBUG + static const char *getReasonStr(GenericSchedulerBase::CandReason Reason); +#endif + + /// Policy for scheduling the next instruction in the candidate's zone. + struct CandPolicy { + bool ReduceLatency; + unsigned ReduceResIdx; + unsigned DemandResIdx; + + CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {} + }; + + /// Status of an instruction's critical resource consumption. + struct SchedResourceDelta { + // Count critical resources in the scheduled region required by SU. + unsigned CritResources; + + // Count critical resources from another region consumed by SU. + unsigned DemandedResources; + + SchedResourceDelta(): CritResources(0), DemandedResources(0) {} + + bool operator==(const SchedResourceDelta &RHS) const { + return CritResources == RHS.CritResources + && DemandedResources == RHS.DemandedResources; + } + bool operator!=(const SchedResourceDelta &RHS) const { + return !operator==(RHS); + } + }; + + /// Store the state used by GenericScheduler heuristics, required for the + /// lifetime of one invocation of pickNode(). + struct SchedCandidate { + CandPolicy Policy; + + // The best SUnit candidate. + SUnit *SU; + + // The reason for this candidate. + CandReason Reason; + + // Set of reasons that apply to multiple candidates. + uint32_t RepeatReasonSet; + + // Register pressure values for the best candidate. + RegPressureDelta RPDelta; + + // Critical resource consumption of the best candidate. + SchedResourceDelta ResDelta; + + SchedCandidate(const CandPolicy &policy) + : Policy(policy), SU(NULL), Reason(NoCand), RepeatReasonSet(0) {} + + bool isValid() const { return SU; } + + // Copy the status of another candidate without changing policy. + void setBest(SchedCandidate &Best) { + assert(Best.Reason != NoCand && "uninitialized Sched candidate"); + SU = Best.SU; + Reason = Best.Reason; + RPDelta = Best.RPDelta; + ResDelta = Best.ResDelta; + } + + bool isRepeat(CandReason R) { return RepeatReasonSet & (1 << R); } + void setRepeat(CandReason R) { RepeatReasonSet |= (1 << R); } + + void initResourceDelta(const ScheduleDAGMI *DAG, + const TargetSchedModel *SchedModel); + }; + +protected: + const MachineSchedContext *Context; + const TargetSchedModel *SchedModel; + const TargetRegisterInfo *TRI; + + SchedRemainder Rem; +protected: + GenericSchedulerBase(const MachineSchedContext *C): + Context(C), SchedModel(0), TRI(0) {} + + void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, + SchedBoundary *OtherZone); + +#ifndef NDEBUG + void traceCandidate(const SchedCandidate &Cand); +#endif +}; +} // namespace + +void GenericSchedulerBase::SchedCandidate:: initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { if (!Policy.ReduceResIdx && !Policy.DemandResIdx) @@ -2315,12 +2213,162 @@ initResourceDelta(const ScheduleDAGMI *DAG, } } +/// Set the CandPolicy given a scheduling zone given the current resources and +/// latencies inside and outside the zone. +void GenericSchedulerBase::setPolicy(CandPolicy &Policy, + bool IsPostRA, + SchedBoundary &CurrZone, + SchedBoundary *OtherZone) { + // Apply preemptive heuristics based on the the total latency and resources + // inside and outside this zone. Potential stalls should be considered before + // following this policy. + + // Compute remaining latency. We need this both to determine whether the + // overall schedule has become latency-limited and whether the instructions + // outside this zone are resource or latency limited. + // + // The "dependent" latency is updated incrementally during scheduling as the + // max height/depth of scheduled nodes minus the cycles since it was + // scheduled: + // DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone + // + // The "independent" latency is the max ready queue depth: + // ILat = max N.depth for N in Available|Pending + // + // RemainingLatency is the greater of independent and dependent latency. + unsigned RemLatency = CurrZone.getDependentLatency(); + RemLatency = std::max(RemLatency, + CurrZone.findMaxLatency(CurrZone.Available.elements())); + RemLatency = std::max(RemLatency, + CurrZone.findMaxLatency(CurrZone.Pending.elements())); + + // Compute the critical resource outside the zone. + unsigned OtherCritIdx = 0; + unsigned OtherCount = + OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0; + + bool OtherResLimited = false; + if (SchedModel->hasInstrSchedModel()) { + unsigned LFactor = SchedModel->getLatencyFactor(); + OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor; + } + // Schedule aggressively for latency in PostRA mode. We don't check for + // acyclic latency during PostRA, and highly out-of-order processors will + // skip PostRA scheduling. + if (!OtherResLimited) { + if (IsPostRA || (RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath)) { + Policy.ReduceLatency |= true; + DEBUG(dbgs() << " " << CurrZone.Available.getName() + << " RemainingLatency " << RemLatency << " + " + << CurrZone.getCurrCycle() << "c > CritPath " + << Rem.CriticalPath << "\n"); + } + } + // If the same resource is limiting inside and outside the zone, do nothing. + if (CurrZone.getZoneCritResIdx() == OtherCritIdx) + return; + + DEBUG( + if (CurrZone.isResourceLimited()) { + dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: " + << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) + << "\n"; + } + if (OtherResLimited) + dbgs() << " RemainingLimit: " + << SchedModel->getResourceName(OtherCritIdx) << "\n"; + if (!CurrZone.isResourceLimited() && !OtherResLimited) + dbgs() << " Latency limited both directions.\n"); + + if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx) + Policy.ReduceResIdx = CurrZone.getZoneCritResIdx(); + + if (OtherResLimited) + Policy.DemandResIdx = OtherCritIdx; +} + +#ifndef NDEBUG +const char *GenericSchedulerBase::getReasonStr( + GenericSchedulerBase::CandReason Reason) { + switch (Reason) { + case NoCand: return "NOCAND "; + case PhysRegCopy: return "PREG-COPY"; + case RegExcess: return "REG-EXCESS"; + case RegCritical: return "REG-CRIT "; + case Stall: return "STALL "; + case Cluster: return "CLUSTER "; + case Weak: return "WEAK "; + case RegMax: return "REG-MAX "; + case ResourceReduce: return "RES-REDUCE"; + case ResourceDemand: return "RES-DEMAND"; + case TopDepthReduce: return "TOP-DEPTH "; + case TopPathReduce: return "TOP-PATH "; + case BotHeightReduce:return "BOT-HEIGHT"; + case BotPathReduce: return "BOT-PATH "; + case NextDefUse: return "DEF-USE "; + case NodeOrder: return "ORDER "; + }; + llvm_unreachable("Unknown reason!"); +} + +void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) { + PressureChange P; + unsigned ResIdx = 0; + unsigned Latency = 0; + switch (Cand.Reason) { + default: + break; + case RegExcess: + P = Cand.RPDelta.Excess; + break; + case RegCritical: + P = Cand.RPDelta.CriticalMax; + break; + case RegMax: + P = Cand.RPDelta.CurrentMax; + break; + case ResourceReduce: + ResIdx = Cand.Policy.ReduceResIdx; + break; + case ResourceDemand: + ResIdx = Cand.Policy.DemandResIdx; + break; + case TopDepthReduce: + Latency = Cand.SU->getDepth(); + break; + case TopPathReduce: + Latency = Cand.SU->getHeight(); + break; + case BotHeightReduce: + Latency = Cand.SU->getHeight(); + break; + case BotPathReduce: + Latency = Cand.SU->getDepth(); + break; + } + dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason); + if (P.isValid()) + dbgs() << " " << TRI->getRegPressureSetName(P.getPSet()) + << ":" << P.getUnitInc() << " "; + else + dbgs() << " "; + if (ResIdx) + dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " "; + else + dbgs() << " "; + if (Latency) + dbgs() << " " << Latency << " cycles "; + else + dbgs() << " "; + dbgs() << '\n'; +} +#endif /// Return true if this heuristic determines order. static bool tryLess(int TryVal, int CandVal, - GenericScheduler::SchedCandidate &TryCand, - GenericScheduler::SchedCandidate &Cand, - GenericScheduler::CandReason Reason) { + GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + GenericSchedulerBase::CandReason Reason) { if (TryVal < CandVal) { TryCand.Reason = Reason; return true; @@ -2335,9 +2383,9 @@ static bool tryLess(int TryVal, int CandVal, } static bool tryGreater(int TryVal, int CandVal, - GenericScheduler::SchedCandidate &TryCand, - GenericScheduler::SchedCandidate &Cand, - GenericScheduler::CandReason Reason) { + GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + GenericSchedulerBase::CandReason Reason) { if (TryVal > CandVal) { TryCand.Reason = Reason; return true; @@ -2351,11 +2399,231 @@ static bool tryGreater(int TryVal, int CandVal, return false; } +static bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + SchedBoundary &Zone) { + if (Zone.isTop()) { + if (Cand.SU->getDepth() > Zone.getScheduledLatency()) { + if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), + TryCand, Cand, GenericSchedulerBase::TopDepthReduce)) + return true; + } + if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(), + TryCand, Cand, GenericSchedulerBase::TopPathReduce)) + return true; + } + else { + if (Cand.SU->getHeight() > Zone.getScheduledLatency()) { + if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), + TryCand, Cand, GenericSchedulerBase::BotHeightReduce)) + return true; + } + if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(), + TryCand, Cand, GenericSchedulerBase::BotPathReduce)) + return true; + } + return false; +} + +static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand, + bool IsTop) { + DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ") + << GenericSchedulerBase::getReasonStr(Cand.Reason) << '\n'); +} + +namespace { +/// GenericScheduler shrinks the unscheduled zone using heuristics to balance +/// the schedule. +class GenericScheduler : public GenericSchedulerBase { + ScheduleDAGMILive *DAG; + + // State of the top and bottom scheduled instruction boundaries. + SchedBoundary Top; + SchedBoundary Bot; + + MachineSchedPolicy RegionPolicy; +public: + GenericScheduler(const MachineSchedContext *C): + GenericSchedulerBase(C), DAG(0), Top(SchedBoundary::TopQID, "TopQ"), + Bot(SchedBoundary::BotQID, "BotQ") {} + + void initPolicy(MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + unsigned NumRegionInstrs) override; + + bool shouldTrackPressure() const override { + return RegionPolicy.ShouldTrackPressure; + } + + void initialize(ScheduleDAGMI *dag) override; + + SUnit *pickNode(bool &IsTopNode) override; + + void schedNode(SUnit *SU, bool IsTopNode) override; + + void releaseTopNode(SUnit *SU) override { + Top.releaseTopNode(SU); + } + + void releaseBottomNode(SUnit *SU) override { + Bot.releaseBottomNode(SU); + } + + void registerRoots() override; + +protected: + void checkAcyclicLatency(); + + void tryCandidate(SchedCandidate &Cand, + SchedCandidate &TryCand, + SchedBoundary &Zone, + const RegPressureTracker &RPTracker, + RegPressureTracker &TempTracker); + + SUnit *pickNodeBidirectional(bool &IsTopNode); + + void pickNodeFromQueue(SchedBoundary &Zone, + const RegPressureTracker &RPTracker, + SchedCandidate &Candidate); + + void reschedulePhysRegCopies(SUnit *SU, bool isTop); +}; +} // namespace + +void GenericScheduler::initialize(ScheduleDAGMI *dag) { + assert(dag->hasVRegLiveness() && + "(PreRA)GenericScheduler needs vreg liveness"); + DAG = static_cast<ScheduleDAGMILive*>(dag); + SchedModel = DAG->getSchedModel(); + TRI = DAG->TRI; + + Rem.init(DAG, SchedModel); + Top.init(DAG, SchedModel, &Rem); + Bot.init(DAG, SchedModel, &Rem); + + // Initialize resource counts. + + // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or + // are disabled, then these HazardRecs will be disabled. + const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); + const TargetMachine &TM = DAG->MF.getTarget(); + if (!Top.HazardRec) { + Top.HazardRec = + TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); + } + if (!Bot.HazardRec) { + Bot.HazardRec = + TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); + } +} + +/// Initialize the per-region scheduling policy. +void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + unsigned NumRegionInstrs) { + const TargetMachine &TM = Context->MF->getTarget(); + const TargetLowering *TLI = TM.getTargetLowering(); + + // Avoid setting up the register pressure tracker for small regions to save + // compile time. As a rough heuristic, only track pressure when the number of + // schedulable instructions exceeds half the integer register file. + RegionPolicy.ShouldTrackPressure = true; + for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) { + MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT; + if (TLI->isTypeLegal(LegalIntVT)) { + unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs( + TLI->getRegClassFor(LegalIntVT)); + RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2); + } + } + + // For generic targets, we default to bottom-up, because it's simpler and more + // compile-time optimizations have been implemented in that direction. + RegionPolicy.OnlyBottomUp = true; + + // Allow the subtarget to override default policy. + const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); + ST.overrideSchedPolicy(RegionPolicy, Begin, End, NumRegionInstrs); + + // After subtarget overrides, apply command line options. + if (!EnableRegPressure) + RegionPolicy.ShouldTrackPressure = false; + + // Check -misched-topdown/bottomup can force or unforce scheduling direction. + // e.g. -misched-bottomup=false allows scheduling in both directions. + assert((!ForceTopDown || !ForceBottomUp) && + "-misched-topdown incompatible with -misched-bottomup"); + if (ForceBottomUp.getNumOccurrences() > 0) { + RegionPolicy.OnlyBottomUp = ForceBottomUp; + if (RegionPolicy.OnlyBottomUp) + RegionPolicy.OnlyTopDown = false; + } + if (ForceTopDown.getNumOccurrences() > 0) { + RegionPolicy.OnlyTopDown = ForceTopDown; + if (RegionPolicy.OnlyTopDown) + RegionPolicy.OnlyBottomUp = false; + } +} + +/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic +/// critical path by more cycles than it takes to drain the instruction buffer. +/// We estimate an upper bounds on in-flight instructions as: +/// +/// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height ) +/// InFlightIterations = AcyclicPath / CyclesPerIteration +/// InFlightResources = InFlightIterations * LoopResources +/// +/// TODO: Check execution resources in addition to IssueCount. +void GenericScheduler::checkAcyclicLatency() { + if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath) + return; + + // Scaled number of cycles per loop iteration. + unsigned IterCount = + std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(), + Rem.RemIssueCount); + // Scaled acyclic critical path. + unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor(); + // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop + unsigned InFlightCount = + (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount; + unsigned BufferLimit = + SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor(); + + Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit; + + DEBUG(dbgs() << "IssueCycles=" + << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c " + << "IterCycles=" << IterCount / SchedModel->getLatencyFactor() + << "c NumIters=" << (AcyclicCount + IterCount-1) / IterCount + << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor() + << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n"; + if (Rem.IsAcyclicLatencyLimited) + dbgs() << " ACYCLIC LATENCY LIMIT\n"); +} + +void GenericScheduler::registerRoots() { + Rem.CriticalPath = DAG->ExitSU.getDepth(); + + // Some roots may not feed into ExitSU. Check all of them in case. + for (std::vector<SUnit*>::const_iterator + I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) { + if ((*I)->getDepth() > Rem.CriticalPath) + Rem.CriticalPath = (*I)->getDepth(); + } + DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n'); + + if (EnableCyclicPath) { + Rem.CyclicCritPath = DAG->computeCyclicCriticalPath(); + checkAcyclicLatency(); + } +} + static bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, - GenericScheduler::SchedCandidate &TryCand, - GenericScheduler::SchedCandidate &Cand, - GenericScheduler::CandReason Reason) { + GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + GenericSchedulerBase::CandReason Reason) { int TryRank = TryP.getPSetOrMax(); int CandRank = CandP.getPSetOrMax(); // If both candidates affect the same set, go with the smallest increase. @@ -2407,32 +2675,6 @@ static int biasPhysRegCopy(const SUnit *SU, bool isTop) { return 0; } -static bool tryLatency(GenericScheduler::SchedCandidate &TryCand, - GenericScheduler::SchedCandidate &Cand, - GenericScheduler::SchedBoundary &Zone) { - if (Zone.isTop()) { - if (Cand.SU->getDepth() > Zone.getScheduledLatency()) { - if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), - TryCand, Cand, GenericScheduler::TopDepthReduce)) - return true; - } - if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(), - TryCand, Cand, GenericScheduler::TopPathReduce)) - return true; - } - else { - if (Cand.SU->getHeight() > Zone.getScheduledLatency()) { - if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), - TryCand, Cand, GenericScheduler::BotHeightReduce)) - return true; - } - if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(), - TryCand, Cand, GenericScheduler::BotPathReduce)) - return true; - } - return false; -} - /// Apply a set of heursitics to a new candidate. Heuristics are currently /// hierarchical. This may be more efficient than a graduated cost model because /// we don't need to evaluate all aspects of the model for each node in the @@ -2445,10 +2687,10 @@ static bool tryLatency(GenericScheduler::SchedCandidate &TryCand, /// \param RPTracker describes reg pressure within the scheduled zone. /// \param TempTracker is a scratch pressure tracker to reuse in queries. void GenericScheduler::tryCandidate(SchedCandidate &Cand, - SchedCandidate &TryCand, - SchedBoundary &Zone, - const RegPressureTracker &RPTracker, - RegPressureTracker &TempTracker) { + SchedCandidate &TryCand, + SchedBoundary &Zone, + const RegPressureTracker &RPTracker, + RegPressureTracker &TempTracker) { if (DAG->isTrackingPressure()) { // Always initialize TryCand's RPDelta. @@ -2510,10 +2752,15 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand, // For loops that are acyclic path limited, aggressively schedule for latency. // This can result in very long dependence chains scheduled in sequence, so // once every cycle (when CurrMOps == 0), switch to normal heuristics. - if (Rem.IsAcyclicLatencyLimited && !Zone.CurrMOps + if (Rem.IsAcyclicLatencyLimited && !Zone.getCurrMOps() && tryLatency(TryCand, Cand, Zone)) return; + // Prioritize instructions that read unbuffered resources by stall cycles. + if (tryLess(Zone.getLatencyStallCycles(TryCand.SU), + Zone.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) + return; + // Keep clustered nodes together to encourage downstream peephole // optimizations which may reduce resource requirements. // @@ -2558,7 +2805,7 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand, // Prefer immediate defs/users of the last scheduled instruction. This is a // local pressure avoidance strategy that also makes the machine code // readable. - if (tryGreater(Zone.NextSUs.count(TryCand.SU), Zone.NextSUs.count(Cand.SU), + if (tryGreater(Zone.isNextSU(TryCand.SU), Zone.isNextSU(Cand.SU), TryCand, Cand, NextDefUse)) return; @@ -2569,90 +2816,14 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand, } } -#ifndef NDEBUG -const char *GenericScheduler::getReasonStr( - GenericScheduler::CandReason Reason) { - switch (Reason) { - case NoCand: return "NOCAND "; - case PhysRegCopy: return "PREG-COPY"; - case RegExcess: return "REG-EXCESS"; - case RegCritical: return "REG-CRIT "; - case Cluster: return "CLUSTER "; - case Weak: return "WEAK "; - case RegMax: return "REG-MAX "; - case ResourceReduce: return "RES-REDUCE"; - case ResourceDemand: return "RES-DEMAND"; - case TopDepthReduce: return "TOP-DEPTH "; - case TopPathReduce: return "TOP-PATH "; - case BotHeightReduce:return "BOT-HEIGHT"; - case BotPathReduce: return "BOT-PATH "; - case NextDefUse: return "DEF-USE "; - case NodeOrder: return "ORDER "; - }; - llvm_unreachable("Unknown reason!"); -} - -void GenericScheduler::traceCandidate(const SchedCandidate &Cand) { - PressureChange P; - unsigned ResIdx = 0; - unsigned Latency = 0; - switch (Cand.Reason) { - default: - break; - case RegExcess: - P = Cand.RPDelta.Excess; - break; - case RegCritical: - P = Cand.RPDelta.CriticalMax; - break; - case RegMax: - P = Cand.RPDelta.CurrentMax; - break; - case ResourceReduce: - ResIdx = Cand.Policy.ReduceResIdx; - break; - case ResourceDemand: - ResIdx = Cand.Policy.DemandResIdx; - break; - case TopDepthReduce: - Latency = Cand.SU->getDepth(); - break; - case TopPathReduce: - Latency = Cand.SU->getHeight(); - break; - case BotHeightReduce: - Latency = Cand.SU->getHeight(); - break; - case BotPathReduce: - Latency = Cand.SU->getDepth(); - break; - } - dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason); - if (P.isValid()) - dbgs() << " " << TRI->getRegPressureSetName(P.getPSet()) - << ":" << P.getUnitInc() << " "; - else - dbgs() << " "; - if (ResIdx) - dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " "; - else - dbgs() << " "; - if (Latency) - dbgs() << " " << Latency << " cycles "; - else - dbgs() << " "; - dbgs() << '\n'; -} -#endif - /// Pick the best candidate from the queue. /// /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during /// DAG building. To adjust for the current scheduling location we need to /// maintain the number of vreg uses remaining to be top-scheduled. void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone, - const RegPressureTracker &RPTracker, - SchedCandidate &Cand) { + const RegPressureTracker &RPTracker, + SchedCandidate &Cand) { ReadyQueue &Q = Zone.Available; DEBUG(Q.dump()); @@ -2675,12 +2846,6 @@ void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone, } } -static void tracePick(const GenericScheduler::SchedCandidate &Cand, - bool IsTop) { - DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ") - << GenericScheduler::getReasonStr(Cand.Reason) << '\n'); -} - /// Pick the best candidate node from either the top or bottom queue. SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) { // Schedule as far as possible in the direction of no choice. This is most @@ -2698,8 +2863,12 @@ SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) { CandPolicy NoPolicy; SchedCandidate BotCand(NoPolicy); SchedCandidate TopCand(NoPolicy); - Bot.setPolicy(BotCand.Policy, Top); - Top.setPolicy(TopCand.Policy, Bot); + // Set the bottom-up policy based on the state of the current bottom zone and + // the instructions outside the zone, including the top zone. + setPolicy(BotCand.Policy, /*IsPostRA=*/false, Bot, &Top); + // Set the top-down policy based on the state of the current top zone and + // the instructions outside the zone, including the bottom zone. + setPolicy(TopCand.Policy, /*IsPostRA=*/false, Top, &Bot); // Prefer bottom scheduling when heuristics are silent. pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); @@ -2809,20 +2978,21 @@ void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) { } /// Update the scheduler's state after scheduling a node. This is the same node -/// that was just returned by pickNode(). However, ScheduleDAGMI needs to update -/// it's state based on the current cycle before MachineSchedStrategy does. +/// that was just returned by pickNode(). However, ScheduleDAGMILive needs to +/// update it's state based on the current cycle before MachineSchedStrategy +/// does. /// /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling /// them here. See comments in biasPhysRegCopy. void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { if (IsTopNode) { - SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.CurrCycle); + SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle()); Top.bumpNode(SU); if (SU->hasPhysRegUses) reschedulePhysRegCopies(SU, true); } else { - SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.CurrCycle); + SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle()); Bot.bumpNode(SU); if (SU->hasPhysRegDefs) reschedulePhysRegCopies(SU, false); @@ -2831,8 +3001,8 @@ void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { /// Create the standard converging machine scheduler. This will be used as the /// default scheduler if the target does not set a default. -static ScheduleDAGInstrs *createGenericSched(MachineSchedContext *C) { - ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new GenericScheduler(C)); +static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C) { + ScheduleDAGMILive *DAG = new ScheduleDAGMILive(C, new GenericScheduler(C)); // Register DAG post-processors. // // FIXME: extend the mutation API to allow earlier mutations to instantiate @@ -2845,9 +3015,191 @@ static ScheduleDAGInstrs *createGenericSched(MachineSchedContext *C) { DAG->addMutation(new MacroFusion(DAG->TII)); return DAG; } + static MachineSchedRegistry GenericSchedRegistry("converge", "Standard converging scheduler.", - createGenericSched); + createGenericSchedLive); + +//===----------------------------------------------------------------------===// +// PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy. +//===----------------------------------------------------------------------===// + +namespace { +/// PostGenericScheduler - Interface to the scheduling algorithm used by +/// ScheduleDAGMI. +/// +/// Callbacks from ScheduleDAGMI: +/// initPolicy -> initialize(DAG) -> registerRoots -> pickNode ... +class PostGenericScheduler : public GenericSchedulerBase { + ScheduleDAGMI *DAG; + SchedBoundary Top; + SmallVector<SUnit*, 8> BotRoots; +public: + PostGenericScheduler(const MachineSchedContext *C): + GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ") {} + + virtual ~PostGenericScheduler() {} + + void initPolicy(MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + unsigned NumRegionInstrs) override { + /* no configurable policy */ + }; + + /// PostRA scheduling does not track pressure. + bool shouldTrackPressure() const override { return false; } + + void initialize(ScheduleDAGMI *Dag) override { + DAG = Dag; + SchedModel = DAG->getSchedModel(); + TRI = DAG->TRI; + + Rem.init(DAG, SchedModel); + Top.init(DAG, SchedModel, &Rem); + BotRoots.clear(); + + // Initialize the HazardRecognizers. If itineraries don't exist, are empty, + // or are disabled, then these HazardRecs will be disabled. + const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); + const TargetMachine &TM = DAG->MF.getTarget(); + if (!Top.HazardRec) { + Top.HazardRec = + TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); + } + } + + void registerRoots() override; + + SUnit *pickNode(bool &IsTopNode) override; + + void scheduleTree(unsigned SubtreeID) override { + llvm_unreachable("PostRA scheduler does not support subtree analysis."); + } + + void schedNode(SUnit *SU, bool IsTopNode) override; + + void releaseTopNode(SUnit *SU) override { + Top.releaseTopNode(SU); + } + + // Only called for roots. + void releaseBottomNode(SUnit *SU) override { + BotRoots.push_back(SU); + } + +protected: + void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand); + + void pickNodeFromQueue(SchedCandidate &Cand); +}; +} // namespace + +void PostGenericScheduler::registerRoots() { + Rem.CriticalPath = DAG->ExitSU.getDepth(); + + // Some roots may not feed into ExitSU. Check all of them in case. + for (SmallVectorImpl<SUnit*>::const_iterator + I = BotRoots.begin(), E = BotRoots.end(); I != E; ++I) { + if ((*I)->getDepth() > Rem.CriticalPath) + Rem.CriticalPath = (*I)->getDepth(); + } + DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n'); +} + +/// Apply a set of heursitics to a new candidate for PostRA scheduling. +/// +/// \param Cand provides the policy and current best candidate. +/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. +void PostGenericScheduler::tryCandidate(SchedCandidate &Cand, + SchedCandidate &TryCand) { + + // Initialize the candidate if needed. + if (!Cand.isValid()) { + TryCand.Reason = NodeOrder; + return; + } + + // Prioritize instructions that read unbuffered resources by stall cycles. + if (tryLess(Top.getLatencyStallCycles(TryCand.SU), + Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) + return; + + // Avoid critical resource consumption and balance the schedule. + if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, + TryCand, Cand, ResourceReduce)) + return; + if (tryGreater(TryCand.ResDelta.DemandedResources, + Cand.ResDelta.DemandedResources, + TryCand, Cand, ResourceDemand)) + return; + + // Avoid serializing long latency dependence chains. + if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) { + return; + } + + // Fall through to original instruction order. + if (TryCand.SU->NodeNum < Cand.SU->NodeNum) + TryCand.Reason = NodeOrder; +} + +void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) { + ReadyQueue &Q = Top.Available; + + DEBUG(Q.dump()); + + for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { + SchedCandidate TryCand(Cand.Policy); + TryCand.SU = *I; + TryCand.initResourceDelta(DAG, SchedModel); + tryCandidate(Cand, TryCand); + if (TryCand.Reason != NoCand) { + Cand.setBest(TryCand); + DEBUG(traceCandidate(Cand)); + } + } +} + +/// Pick the next node to schedule. +SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) { + if (DAG->top() == DAG->bottom()) { + assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage"); + return NULL; + } + SUnit *SU; + do { + SU = Top.pickOnlyChoice(); + if (!SU) { + CandPolicy NoPolicy; + SchedCandidate TopCand(NoPolicy); + // Set the top-down policy based on the state of the current top zone and + // the instructions outside the zone, including the bottom zone. + setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, NULL); + pickNodeFromQueue(TopCand); + assert(TopCand.Reason != NoCand && "failed to find a candidate"); + tracePick(TopCand, true); + SU = TopCand.SU; + } + } while (SU->isScheduled); + + IsTopNode = true; + Top.removeReady(SU); + + DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr()); + return SU; +} + +/// Called after ScheduleDAGMI has scheduled an instruction and updated +/// scheduled/remaining flags in the DAG nodes. +void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { + SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle()); + Top.bumpNode(SU); +} + +/// Create a generic scheduler with no vreg liveness or DAG mutation passes. +static ScheduleDAGInstrs *createGenericSchedPostRA(MachineSchedContext *C) { + return new ScheduleDAGMI(C, new PostGenericScheduler(C), /*IsPostRA=*/true); +} //===----------------------------------------------------------------------===// // ILP Scheduler. Currently for experimental analysis of heuristics. @@ -2889,22 +3241,23 @@ struct ILPOrder { /// \brief Schedule based on the ILP metric. class ILPScheduler : public MachineSchedStrategy { - ScheduleDAGMI *DAG; + ScheduleDAGMILive *DAG; ILPOrder Cmp; std::vector<SUnit*> ReadyQ; public: ILPScheduler(bool MaximizeILP): DAG(0), Cmp(MaximizeILP) {} - virtual void initialize(ScheduleDAGMI *dag) { - DAG = dag; + void initialize(ScheduleDAGMI *dag) override { + assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness"); + DAG = static_cast<ScheduleDAGMILive*>(dag); DAG->computeDFSResult(); Cmp.DFSResult = DAG->getDFSResult(); Cmp.ScheduledTrees = &DAG->getScheduledTrees(); ReadyQ.clear(); } - virtual void registerRoots() { + void registerRoots() override { // Restore the heap in ReadyQ with the updated DFS results. std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); } @@ -2913,7 +3266,7 @@ public: /// ----------------------------------------- /// Callback to select the highest priority node from the ready Q. - virtual SUnit *pickNode(bool &IsTopNode) { + SUnit *pickNode(bool &IsTopNode) override { if (ReadyQ.empty()) return NULL; std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); SUnit *SU = ReadyQ.back(); @@ -2929,19 +3282,19 @@ public: } /// \brief Scheduler callback to notify that a new subtree is scheduled. - virtual void scheduleTree(unsigned SubtreeID) { + void scheduleTree(unsigned SubtreeID) override { std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); } /// Callback after a node is scheduled. Mark a newly scheduled tree, notify /// DFSResults, and resort the priority Q. - virtual void schedNode(SUnit *SU, bool IsTopNode) { + void schedNode(SUnit *SU, bool IsTopNode) override { assert(!IsTopNode && "SchedDFSResult needs bottom-up"); } - virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ } + void releaseTopNode(SUnit *) override { /*only called for top roots*/ } - virtual void releaseBottomNode(SUnit *SU) { + void releaseBottomNode(SUnit *SU) override { ReadyQ.push_back(SU); std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); } @@ -2949,10 +3302,10 @@ public: } // namespace static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) { - return new ScheduleDAGMI(C, new ILPScheduler(true)); + return new ScheduleDAGMILive(C, new ILPScheduler(true)); } static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) { - return new ScheduleDAGMI(C, new ILPScheduler(false)); + return new ScheduleDAGMILive(C, new ILPScheduler(false)); } static MachineSchedRegistry ILPMaxRegistry( "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler); @@ -2994,7 +3347,7 @@ public: InstructionShuffler(bool alternate, bool topdown) : IsAlternating(alternate), IsTopDown(topdown) {} - virtual void initialize(ScheduleDAGMI *) { + virtual void initialize(ScheduleDAGMI*) { TopQ.clear(); BottomQ.clear(); } @@ -3041,7 +3394,7 @@ static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) { bool TopDown = !ForceBottomUp; assert((TopDown || !ForceTopDown) && "-misched-topdown incompatible with -misched-bottomup"); - return new ScheduleDAGMI(C, new InstructionShuffler(Alternate, TopDown)); + return new ScheduleDAGMILive(C, new InstructionShuffler(Alternate, TopDown)); } static MachineSchedRegistry ShufflerRegistry( "shuffle", "Shuffle machine instructions alternating directions", @@ -3049,7 +3402,7 @@ static MachineSchedRegistry ShufflerRegistry( #endif // !NDEBUG //===----------------------------------------------------------------------===// -// GraphWriter support for ScheduleDAGMI. +// GraphWriter support for ScheduleDAGMILive. //===----------------------------------------------------------------------===// #ifndef NDEBUG @@ -3095,8 +3448,9 @@ struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits { static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) { std::string Str; raw_string_ostream SS(Str); - const SchedDFSResult *DFS = - static_cast<const ScheduleDAGMI*>(G)->getDFSResult(); + const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G); + const SchedDFSResult *DFS = DAG->hasVRegLiveness() ? + static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : 0; SS << "SU:" << SU->NodeNum; if (DFS) SS << " I:" << DFS->getNumInstrs(SU); @@ -3106,11 +3460,11 @@ struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits { return G->getGraphNodeLabel(SU); } - static std::string getNodeAttributes(const SUnit *N, - const ScheduleDAG *Graph) { + static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) { std::string Str("shape=Mrecord"); - const SchedDFSResult *DFS = - static_cast<const ScheduleDAGMI*>(Graph)->getDFSResult(); + const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G); + const SchedDFSResult *DFS = DAG->hasVRegLiveness() ? + static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : 0; if (DFS) { Str += ",style=filled,fillcolor=\"#"; Str += DOT::getColorString(DFS->getSubtreeID(N)); diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index 105d7c2..dbff1f6 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -60,9 +60,9 @@ namespace { initializeMachineSinkingPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); AU.addRequired<AliasAnalysis>(); @@ -72,7 +72,7 @@ namespace { AU.addPreserved<MachineLoopInfo>(); } - virtual void releaseMemory() { + void releaseMemory() override { CEBCandidates.clear(); } @@ -98,16 +98,6 @@ namespace { bool PerformTrivialForwardCoalescing(MachineInstr *MI, MachineBasicBlock *MBB); }; - - // SuccessorSorter - Sort Successors according to their loop depth. - struct SuccessorSorter { - SuccessorSorter(MachineLoopInfo *LoopInfo) : LI(LoopInfo) {} - bool operator()(const MachineBasicBlock *LHS, - const MachineBasicBlock *RHS) const { - return LI->getLoopDepth(LHS) < LI->getLoopDepth(RHS); - } - MachineLoopInfo *LI; - }; } // end anonymous namespace char MachineSinking::ID = 0; @@ -181,13 +171,12 @@ MachineSinking::AllUsesDominatedByBlock(unsigned Reg, // Predecessors according to CFG: BB#0 BB#1 // %reg16386<def> = PHI %reg16434, <BB#0>, %reg16385, <BB#1> BreakPHIEdge = true; - for (MachineRegisterInfo::use_nodbg_iterator - I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); - I != E; ++I) { - MachineInstr *UseInst = &*I; + for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) { + MachineInstr *UseInst = MO.getParent(); + unsigned OpNo = &MO - &UseInst->getOperand(0); MachineBasicBlock *UseBlock = UseInst->getParent(); if (!(UseBlock == MBB && UseInst->isPHI() && - UseInst->getOperand(I.getOperandNo()+1).getMBB() == DefMBB)) { + UseInst->getOperand(OpNo+1).getMBB() == DefMBB)) { BreakPHIEdge = false; break; } @@ -195,16 +184,15 @@ MachineSinking::AllUsesDominatedByBlock(unsigned Reg, if (BreakPHIEdge) return true; - for (MachineRegisterInfo::use_nodbg_iterator - I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); - I != E; ++I) { + for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) { // Determine the block of the use. - MachineInstr *UseInst = &*I; + MachineInstr *UseInst = MO.getParent(); + unsigned OpNo = &MO - &UseInst->getOperand(0); MachineBasicBlock *UseBlock = UseInst->getParent(); if (UseInst->isPHI()) { // PHI nodes use the operand in the predecessor block, not the block with // the PHI. - UseBlock = UseInst->getOperand(I.getOperandNo()+1).getMBB(); + UseBlock = UseInst->getOperand(OpNo+1).getMBB(); } else if (UseBlock == DefMBB) { LocalUse = true; return false; @@ -219,6 +207,9 @@ MachineSinking::AllUsesDominatedByBlock(unsigned Reg, } bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { + if (skipOptnoneFunction(*MF.getFunction())) + return false; + DEBUG(dbgs() << "******** Machine Sinking ********\n"); const TargetMachine &TM = MF.getTarget(); @@ -460,12 +451,9 @@ bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr *MI, // Check if only use in post dominated block is PHI instruction. bool NonPHIUse = false; - for (MachineRegisterInfo::use_nodbg_iterator - I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); - I != E; ++I) { - MachineInstr *UseInst = &*I; - MachineBasicBlock *UseBlock = UseInst->getParent(); - if (UseBlock == SuccToSinkTo && !UseInst->isPHI()) + for (MachineInstr &UseInst : MRI->use_nodbg_instructions(Reg)) { + MachineBasicBlock *UseBlock = UseInst.getParent(); + if (UseBlock == SuccToSinkTo && !UseInst.isPHI()) NonPHIUse = true; } if (!NonPHIUse) @@ -553,7 +541,12 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI, // we should sink to. // We give successors with smaller loop depth higher priority. SmallVector<MachineBasicBlock*, 4> Succs(MBB->succ_begin(), MBB->succ_end()); - std::stable_sort(Succs.begin(), Succs.end(), SuccessorSorter(LI)); + // Sort Successors according to their loop depth. + std::stable_sort( + Succs.begin(), Succs.end(), + [this](const MachineBasicBlock *LHS, const MachineBasicBlock *RHS) { + return LI->getLoopDepth(LHS) < LI->getLoopDepth(RHS); + }); for (SmallVectorImpl<MachineBasicBlock *>::iterator SI = Succs.begin(), E = Succs.end(); SI != E; ++SI) { MachineBasicBlock *SuccBlock = *SI; diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp index 6aa3f67..d07178e 100644 --- a/lib/CodeGen/MachineTraceMetrics.cpp +++ b/lib/CodeGen/MachineTraceMetrics.cpp @@ -302,9 +302,9 @@ static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To) { // instructions. namespace { class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble { - const char *getName() const { return "MinInstr"; } - const MachineBasicBlock *pickTracePred(const MachineBasicBlock*); - const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*); + const char *getName() const override { return "MinInstr"; } + const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) override; + const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) override; public: MinInstrCountEnsemble(MachineTraceMetrics *mtm) @@ -627,7 +627,7 @@ struct DataDep { assert(TargetRegisterInfo::isVirtualRegister(VirtReg)); MachineRegisterInfo::def_iterator DefI = MRI->def_begin(VirtReg); assert(!DefI.atEnd() && "Register has no defs"); - DefMI = &*DefI; + DefMI = DefI->getParent(); DefOp = DefI.getOperandNo(); assert((++DefI).atEnd() && "Register has multiple defs"); } @@ -944,7 +944,7 @@ static bool pushDepHeight(const DataDep &Dep, // Update Heights[DefMI] to be the maximum height seen. MIHeightMap::iterator I; bool New; - tie(I, New) = Heights.insert(std::make_pair(Dep.DefMI, UseHeight)); + std::tie(I, New) = Heights.insert(std::make_pair(Dep.DefMI, UseHeight)); if (New) return true; diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index d61470c..1bd75f7 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -246,12 +246,12 @@ namespace { initializeMachineVerifierPassPass(*PassRegistry::getPassRegistry()); } - void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); } - bool runOnMachineFunction(MachineFunction &MF) { + bool runOnMachineFunction(MachineFunction &MF) override { MF.verify(this, Banner); return false; } @@ -276,7 +276,8 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { raw_ostream *OutFile = 0; if (OutFileName) { std::string ErrorInfo; - OutFile = new raw_fd_ostream(OutFileName, ErrorInfo, sys::fs::F_Append); + OutFile = new raw_fd_ostream(OutFileName, ErrorInfo, + sys::fs::F_Append | sys::fs::F_Text); if (!ErrorInfo.empty()) { errs() << "Error opening '" << OutFileName << "': " << ErrorInfo << '\n'; exit(1); @@ -1075,7 +1076,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { // Verify SSA form. if (MRI->isSSA() && TargetRegisterInfo::isVirtualRegister(Reg) && - llvm::next(MRI->def_begin(Reg)) != MRI->def_end()) + std::next(MRI->def_begin(Reg)) != MRI->def_end()) report("Multiple virtual register defs in SSA form", MO, MONum); // Check LiveInts for a live segment, but only for virtual registers. diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp index 3982612..56cb673 100644 --- a/lib/CodeGen/OptimizePHIs.cpp +++ b/lib/CodeGen/OptimizePHIs.cpp @@ -37,9 +37,9 @@ namespace { initializeOptimizePHIsPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -61,6 +61,9 @@ INITIALIZE_PASS(OptimizePHIs, "opt-phis", "Optimize machine instruction PHIs", false, false) bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) { + if (skipOptnoneFunction(*Fn.getFunction())) + return false; + MRI = &Fn.getRegInfo(); TII = Fn.getTarget().getInstrInfo(); @@ -139,10 +142,8 @@ bool OptimizePHIs::IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle) { if (PHIsInCycle.size() == 16) return false; - for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DstReg), - E = MRI->use_end(); I != E; ++I) { - MachineInstr *UseMI = &*I; - if (!UseMI->isPHI() || !IsDeadPHICycle(UseMI, PHIsInCycle)) + for (MachineInstr &UseMI : MRI->use_instructions(DstReg)) { + if (!UseMI.isPHI() || !IsDeadPHICycle(&UseMI, PHIsInCycle)) return false; } diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index dcd9072..0e9df58 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -57,8 +57,8 @@ namespace { initializePHIEliminationPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnMachineFunction(MachineFunction &Fn); - virtual void getAnalysisUsage(AnalysisUsage &AU) const; + bool runOnMachineFunction(MachineFunction &Fn) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; private: /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions @@ -186,7 +186,7 @@ bool PHIElimination::EliminatePHINodes(MachineFunction &MF, // Get an iterator to the first instruction after the last PHI node (this may // also be the end of the basic block). MachineBasicBlock::iterator LastPHIIt = - prior(MBB.SkipPHIsAndLabels(MBB.begin())); + std::prev(MBB.SkipPHIsAndLabels(MBB.begin())); while (MBB.front().isPHI()) LowerPHINode(MBB, LastPHIIt); @@ -198,9 +198,8 @@ bool PHIElimination::EliminatePHINodes(MachineFunction &MF, /// This includes registers with no defs. static bool isImplicitlyDefined(unsigned VirtReg, const MachineRegisterInfo *MRI) { - for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(VirtReg), - DE = MRI->def_end(); DI != DE; ++DI) - if (!DI->isImplicitDef()) + for (MachineInstr &DI : MRI->def_instructions(VirtReg)) + if (!DI.isImplicitDef()) return false; return true; } @@ -222,7 +221,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, MachineBasicBlock::iterator LastPHIIt) { ++NumLowered; - MachineBasicBlock::iterator AfterPHIsIt = llvm::next(LastPHIIt); + MachineBasicBlock::iterator AfterPHIsIt = std::next(LastPHIIt); // Unlink the PHI node from the basic block, but don't delete the PHI yet. MachineInstr *MPhi = MBB.remove(MBB.begin()); @@ -267,7 +266,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, // Update live variable information if there is any. if (LV) { - MachineInstr *PHICopy = prior(AfterPHIsIt); + MachineInstr *PHICopy = std::prev(AfterPHIsIt); if (IncomingReg) { LiveVariables::VarInfo &VI = LV->getVarInfo(IncomingReg); @@ -306,7 +305,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, // Update LiveIntervals for the new copy or implicit def. if (LIS) { - MachineInstr *NewInstr = prior(AfterPHIsIt); + MachineInstr *NewInstr = std::prev(AfterPHIsIt); SlotIndex DestCopyIndex = LIS->InsertMachineInstrInMaps(NewInstr); SlotIndex MBBStartIndex = LIS->getMBBStartIdx(&MBB); @@ -444,7 +443,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, } } else { // We just inserted this copy. - KillInst = prior(InsertPos); + KillInst = std::prev(InsertPos); } } assert(KillInst->readsRegister(SrcReg) && "Cannot find kill instruction"); @@ -504,7 +503,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, } } else { // We just inserted this copy. - KillInst = prior(InsertPos); + KillInst = std::prev(InsertPos); } } assert(KillInst->readsRegister(SrcReg) && @@ -607,7 +606,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, if (!ShouldSplit) continue; if (!PreMBB->SplitCriticalEdge(&MBB, this)) { - DEBUG(dbgs() << "Failed to split ciritcal edge.\n"); + DEBUG(dbgs() << "Failed to split critical edge.\n"); continue; } Changed = true; diff --git a/lib/CodeGen/PHIEliminationUtils.cpp b/lib/CodeGen/PHIEliminationUtils.cpp index e1b56e9..99bbad1 100644 --- a/lib/CodeGen/PHIEliminationUtils.cpp +++ b/lib/CodeGen/PHIEliminationUtils.cpp @@ -34,11 +34,9 @@ llvm::findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB, // Discover any defs/uses in this basic block. SmallPtrSet<MachineInstr*, 8> DefUsesInMBB; MachineRegisterInfo& MRI = MBB->getParent()->getRegInfo(); - for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(SrcReg), - RE = MRI.reg_end(); RI != RE; ++RI) { - MachineInstr* DefUseMI = &*RI; - if (DefUseMI->getParent() == MBB) - DefUsesInMBB.insert(DefUseMI); + for (MachineInstr &RI : MRI.reg_instructions(SrcReg)) { + if (RI.getParent() == MBB) + DefUsesInMBB.insert(&RI); } MachineBasicBlock::iterator InsertPoint; diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index f4ffd03..080b20d 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -14,11 +14,11 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/Verifier.h" -#include "llvm/Assembly/PrintModulePass.h" #include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/IR/IRPrintingPasses.h" +#include "llvm/IR/Verifier.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/PassManager.h" #include "llvm/Support/CommandLine.h" @@ -30,6 +30,11 @@ using namespace llvm; +namespace llvm { +extern cl::opt<bool> EnableStackMapLiveness; +extern cl::opt<bool> EnablePatchPointLiveness; +} + static cl::opt<bool> DisablePostRA("disable-post-ra", cl::Hidden, cl::desc("Disable Post Regalloc")); static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden, @@ -56,7 +61,7 @@ static cl::opt<cl::boolOrDefault> OptimizeRegAlloc("optimize-regalloc", cl::Hidden, cl::desc("Enable optimized register allocation compilation path.")); static cl::opt<cl::boolOrDefault> -EnableMachineSched("enable-misched", cl::Hidden, +EnableMachineSched("enable-misched", cl::desc("Enable the machine instruction scheduling pass.")); static cl::opt<bool> DisablePostRAMachineLICM("disable-postra-machine-licm", cl::Hidden, @@ -65,6 +70,8 @@ static cl::opt<bool> DisableMachineSink("disable-machine-sink", cl::Hidden, cl::desc("Disable Machine Sinking")); static cl::opt<bool> DisableLSR("disable-lsr", cl::Hidden, cl::desc("Disable Loop Strength Reduction Pass")); +static cl::opt<bool> DisableConstantHoisting("disable-constant-hoisting", + cl::Hidden, cl::desc("Disable ConstantHoisting")); static cl::opt<bool> DisableCGP("disable-cgp", cl::Hidden, cl::desc("Disable Codegen Prepare")); static cl::opt<bool> DisableCopyProp("disable-copyprop", cl::Hidden, @@ -83,6 +90,14 @@ PrintMachineInstrs("print-machineinstrs", cl::ValueOptional, cl::desc("Print machine instrs"), cl::value_desc("pass-name"), cl::init("option-unspecified")); +// Temporary option to allow experimenting with MachineScheduler as a post-RA +// scheduler. Targets can "properly" enable this with +// substitutePass(&PostRASchedulerID, &MachineSchedulerID); Ideally it wouldn't +// be part of the standard pass pipeline, and the target would just add a PostRA +// scheduling pass wherever it wants. +static cl::opt<bool> MISchedPostRA("misched-postra", cl::Hidden, + cl::desc("Run MachineScheduler post regalloc (independent of preRA sched)")); + // Experimental option to run live interval analysis early. static cl::opt<bool> EarlyLiveIntervals("early-live-intervals", cl::Hidden, cl::desc("Run live interval analysis earlier in the pipeline")); @@ -376,13 +391,17 @@ void TargetPassConfig::addIRPasses() { if (getOptLevel() != CodeGenOpt::None && !DisableLSR) { addPass(createLoopStrengthReducePass()); if (PrintLSR) - addPass(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs())); + addPass(createPrintFunctionPass(dbgs(), "\n\n*** Code after LSR ***\n")); } addPass(createGCLoweringPass()); // Make sure that no unreachable blocks are instruction selected. addPass(createUnreachableBlockEliminationPass()); + + // Prepare expensive constants for SelectionDAG. + if (getOptLevel() != CodeGenOpt::None && !DisableConstantHoisting) + addPass(createConstantHoistingPass()); } /// Turn exception handling constructs into something the code generators can @@ -404,7 +423,7 @@ void TargetPassConfig::addPassesToHandleExceptions() { addPass(createDwarfEHPass(TM)); break; case ExceptionHandling::None: - addPass(createLowerInvokePass(TM)); + addPass(createLowerInvokePass()); // The lower invoke pass may create unreachable code. Remove it. addPass(createUnreachableBlockEliminationPass()); @@ -422,14 +441,13 @@ void TargetPassConfig::addCodeGenPrepare() { /// Add common passes that perform LLVM IR to IR transforms in preparation for /// instruction selection. void TargetPassConfig::addISelPrepare() { - addPass(createStackProtectorPass(TM)); - addPreISel(); + addPass(createStackProtectorPass(TM)); + if (PrintISelInput) - addPass(createPrintFunctionPass("\n\n" - "*** Final LLVM Code input to ISel ***\n", - &dbgs())); + addPass(createPrintFunctionPass( + dbgs(), "\n\n*** Final LLVM Code input to ISel ***\n")); // All passes which modify the LLVM IR are now complete; run the verifier // to ensure that the IR is valid. @@ -520,7 +538,10 @@ void TargetPassConfig::addMachinePasses() { // Second pass scheduler. if (getOptLevel() != CodeGenOpt::None) { - addPass(&PostRASchedulerID); + if (MISchedPostRA) + addPass(&PostMachineSchedulerID); + else + addPass(&PostRASchedulerID); printAndVerify("After PostRAScheduler"); } @@ -536,6 +557,9 @@ void TargetPassConfig::addMachinePasses() { if (addPreEmitPass()) printAndVerify("After PreEmit passes"); + + if (EnableStackMapLiveness || EnablePatchPointLiveness) + addPass(&StackMapLivenessID); } /// Add passes that optimize machine instructions in SSA form. @@ -725,7 +749,10 @@ void TargetPassConfig::addMachineLateOptimization() { printAndVerify("After BranchFolding"); // Tail duplication. - if (addPass(&TailDuplicateID)) + // Note that duplicating tail just increases code size and degrades + // performance for targets that require Structured Control Flow. + // In addition it can also make CFG irreducible. Thus we disable it. + if (!TM->requiresStructuredCFG() && addPass(&TailDuplicateID)) printAndVerify("After TailDuplicate"); // Copy propagation. diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index 28f2d2f..e18d9635 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -110,9 +110,9 @@ namespace { initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); if (Aggressive) { @@ -133,7 +133,8 @@ namespace { bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, SmallSet<unsigned, 4> &ImmDefRegs, DenseMap<unsigned, MachineInstr*> &ImmDefMIs); - bool isLoadFoldable(MachineInstr *MI, unsigned &FoldAsLoadDefReg); + bool isLoadFoldable(MachineInstr *MI, + SmallSet<unsigned, 16> &FoldAsLoadDefCandidates); }; } @@ -187,10 +188,8 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, // The source has other uses. See if we can replace the other uses with use of // the result of the extension. SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs; - for (MachineRegisterInfo::use_nodbg_iterator - UI = MRI->use_nodbg_begin(DstReg), UE = MRI->use_nodbg_end(); - UI != UE; ++UI) - ReachedBBs.insert(UI->getParent()); + for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg)) + ReachedBBs.insert(UI.getParent()); // Uses that are in the same BB of uses of the result of the instruction. SmallVector<MachineOperand*, 8> Uses; @@ -199,11 +198,8 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, SmallVector<MachineOperand*, 8> ExtendedUses; bool ExtendLife = true; - for (MachineRegisterInfo::use_nodbg_iterator - UI = MRI->use_nodbg_begin(SrcReg), UE = MRI->use_nodbg_end(); - UI != UE; ++UI) { - MachineOperand &UseMO = UI.getOperand(); - MachineInstr *UseMI = &*UI; + for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) { + MachineInstr *UseMI = UseMO.getParent(); if (UseMI == MI) continue; @@ -270,11 +266,9 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, // Look for PHI uses of the extended result, we don't want to extend the // liveness of a PHI input. It breaks all kinds of assumptions down // stream. A PHI use is expected to be the kill of its source values. - for (MachineRegisterInfo::use_nodbg_iterator - UI = MRI->use_nodbg_begin(DstReg), UE = MRI->use_nodbg_end(); - UI != UE; ++UI) - if (UI->isPHI()) - PHIBBs.insert(UI->getParent()); + for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg)) + if (UI.isPHI()) + PHIBBs.insert(UI.getParent()); const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); for (unsigned i = 0, e = Uses.size(); i != e; ++i) { @@ -496,8 +490,9 @@ bool PeepholeOptimizer::optimizeCopyOrBitcast(MachineInstr *MI) { /// isLoadFoldable - Check whether MI is a candidate for folding into a later /// instruction. We only fold loads to virtual registers and the virtual /// register defined has a single use. -bool PeepholeOptimizer::isLoadFoldable(MachineInstr *MI, - unsigned &FoldAsLoadDefReg) { +bool PeepholeOptimizer::isLoadFoldable( + MachineInstr *MI, + SmallSet<unsigned, 16> &FoldAsLoadDefCandidates) { if (!MI->canFoldAsLoad() || !MI->mayLoad()) return false; const MCInstrDesc &MCID = MI->getDesc(); @@ -505,13 +500,13 @@ bool PeepholeOptimizer::isLoadFoldable(MachineInstr *MI, return false; unsigned Reg = MI->getOperand(0).getReg(); - // To reduce compilation time, we check MRI->hasOneUse when inserting + // To reduce compilation time, we check MRI->hasOneNonDBGUse when inserting // loads. It should be checked when processing uses of the load, since // uses can be removed during peephole. if (!MI->getOperand(0).getSubReg() && TargetRegisterInfo::isVirtualRegister(Reg) && - MRI->hasOneUse(Reg)) { - FoldAsLoadDefReg = Reg; + MRI->hasOneNonDBGUse(Reg)) { + FoldAsLoadDefCandidates.insert(Reg); return true; } return false; @@ -561,6 +556,9 @@ bool PeepholeOptimizer::foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, } bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { + if (skipOptnoneFunction(*MF.getFunction())) + return false; + DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n"); DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n'); @@ -574,18 +572,14 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; - SmallPtrSet<MachineInstr*, 8> LocalMIs; - SmallSet<unsigned, 4> ImmDefRegs; - DenseMap<unsigned, MachineInstr*> ImmDefMIs; - unsigned FoldAsLoadDefReg; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { MachineBasicBlock *MBB = &*I; bool SeenMoveImm = false; - LocalMIs.clear(); - ImmDefRegs.clear(); - ImmDefMIs.clear(); - FoldAsLoadDefReg = 0; + SmallPtrSet<MachineInstr*, 8> LocalMIs; + SmallSet<unsigned, 4> ImmDefRegs; + DenseMap<unsigned, MachineInstr*> ImmDefMIs; + SmallSet<unsigned, 16> FoldAsLoadDefCandidates; for (MachineBasicBlock::iterator MII = I->begin(), MIE = I->end(); MII != MIE; ) { @@ -594,16 +588,20 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { ++MII; LocalMIs.insert(MI); + // Skip debug values. They should not affect this peephole optimization. + if (MI->isDebugValue()) + continue; + // If there exists an instruction which belongs to the following - // categories, we will discard the load candidate. - if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() || - MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() || + // categories, we will discard the load candidates. + if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || + MI->isKill() || MI->isInlineAsm() || MI->hasUnmodeledSideEffects()) { - FoldAsLoadDefReg = 0; + FoldAsLoadDefCandidates.clear(); continue; } if (MI->mayStore() || MI->isCall()) - FoldAsLoadDefReg = 0; + FoldAsLoadDefCandidates.clear(); if (((MI->isBitcast() || MI->isCopy()) && optimizeCopyOrBitcast(MI)) || (MI->isCompare() && optimizeCmpInstr(MI, MBB)) || @@ -630,26 +628,43 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { // Check whether MI is a load candidate for folding into a later // instruction. If MI is not a candidate, check whether we can fold an // earlier load into MI. - if (!isLoadFoldable(MI, FoldAsLoadDefReg) && FoldAsLoadDefReg) { - // We need to fold load after optimizeCmpInstr, since optimizeCmpInstr - // can enable folding by converting SUB to CMP. - MachineInstr *DefMI = 0; - MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI, - FoldAsLoadDefReg, DefMI); - if (FoldMI) { - // Update LocalMIs since we replaced MI with FoldMI and deleted DefMI. - DEBUG(dbgs() << "Replacing: " << *MI); - DEBUG(dbgs() << " With: " << *FoldMI); - LocalMIs.erase(MI); - LocalMIs.erase(DefMI); - LocalMIs.insert(FoldMI); - MI->eraseFromParent(); - DefMI->eraseFromParent(); - ++NumLoadFold; - - // MI is replaced with FoldMI. - Changed = true; - continue; + if (!isLoadFoldable(MI, FoldAsLoadDefCandidates) && + !FoldAsLoadDefCandidates.empty()) { + const MCInstrDesc &MIDesc = MI->getDesc(); + for (unsigned i = MIDesc.getNumDefs(); i != MIDesc.getNumOperands(); + ++i) { + const MachineOperand &MOp = MI->getOperand(i); + if (!MOp.isReg()) + continue; + unsigned FoldAsLoadDefReg = MOp.getReg(); + if (FoldAsLoadDefCandidates.count(FoldAsLoadDefReg)) { + // We need to fold load after optimizeCmpInstr, since + // optimizeCmpInstr can enable folding by converting SUB to CMP. + // Save FoldAsLoadDefReg because optimizeLoadInstr() resets it and + // we need it for markUsesInDebugValueAsUndef(). + unsigned FoldedReg = FoldAsLoadDefReg; + MachineInstr *DefMI = 0; + MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI, + FoldAsLoadDefReg, + DefMI); + if (FoldMI) { + // Update LocalMIs since we replaced MI with FoldMI and deleted + // DefMI. + DEBUG(dbgs() << "Replacing: " << *MI); + DEBUG(dbgs() << " With: " << *FoldMI); + LocalMIs.erase(MI); + LocalMIs.erase(DefMI); + LocalMIs.insert(FoldMI); + MI->eraseFromParent(); + DefMI->eraseFromParent(); + MRI->markUsesInDebugValueAsUndef(FoldedReg); + FoldAsLoadDefCandidates.erase(FoldedReg); + ++NumLoadFold; + // MI is replaced with FoldMI. + Changed = true; + break; + } + } } } } diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index 1afc1ec..a13e51f 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -30,7 +30,6 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterClassInfo.h" @@ -86,7 +85,7 @@ namespace { static char ID; PostRAScheduler() : MachineFunctionPass(ID) {} - void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired<AliasAnalysis>(); AU.addRequired<TargetPassConfig>(); @@ -97,7 +96,7 @@ namespace { MachineFunctionPass::getAnalysisUsage(AU); } - bool runOnMachineFunction(MachineFunction &Fn); + bool runOnMachineFunction(MachineFunction &Fn) override; }; char PostRAScheduler::ID = 0; @@ -121,9 +120,6 @@ namespace { /// AA - AliasAnalysis for making memory reference queries. AliasAnalysis *AA; - /// LiveRegs - true if the register is live. - BitVector LiveRegs; - /// The schedule. Null SUnit*'s represent noop instructions. std::vector<SUnit*> Sequence; @@ -145,23 +141,23 @@ namespace { /// startBlock - Initialize register live-range state for scheduling in /// this block. /// - void startBlock(MachineBasicBlock *BB); + void startBlock(MachineBasicBlock *BB) override; // Set the index of RegionEnd within the current BB. void setEndIndex(unsigned EndIdx) { EndIndex = EndIdx; } /// Initialize the scheduler state for the next scheduling region. - virtual void enterRegion(MachineBasicBlock *bb, - MachineBasicBlock::iterator begin, - MachineBasicBlock::iterator end, - unsigned regioninstrs); + void enterRegion(MachineBasicBlock *bb, + MachineBasicBlock::iterator begin, + MachineBasicBlock::iterator end, + unsigned regioninstrs) override; /// Notify that the scheduler has finished scheduling the current region. - virtual void exitRegion(); + void exitRegion() override; /// Schedule - Schedule the instruction range using list scheduling. /// - void schedule(); + void schedule() override; void EmitSchedule(); @@ -172,26 +168,16 @@ namespace { /// finishBlock - Clean up register live-range state. /// - void finishBlock(); - - /// FixupKills - Fix register kill flags that have been made - /// invalid due to scheduling - /// - void FixupKills(MachineBasicBlock *MBB); + void finishBlock() override; private: void ReleaseSucc(SUnit *SU, SDep *SuccEdge); void ReleaseSuccessors(SUnit *SU); void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); void ListScheduleTopDown(); - void StartBlockForKills(MachineBasicBlock *BB); - - // ToggleKillFlag - Toggle a register operand kill flag. Other - // adjustments may be made to the instruction if necessary. Return - // true if the operand has been deleted, false if not. - bool ToggleKillFlag(MachineInstr *MI, MachineOperand &MO); void dumpSchedule() const; + void emitNoop(unsigned CurCycle); }; } @@ -205,9 +191,8 @@ SchedulePostRATDList::SchedulePostRATDList( AliasAnalysis *AA, const RegisterClassInfo &RCI, TargetSubtargetInfo::AntiDepBreakMode AntiDepMode, SmallVectorImpl<const TargetRegisterClass*> &CriticalPathRCs) - : ScheduleDAGInstrs(MF, MLI, MDT, /*IsPostRA=*/true), AA(AA), - LiveRegs(TRI->getNumRegs()), EndIndex(0) -{ + : ScheduleDAGInstrs(MF, MLI, MDT, /*IsPostRA=*/true), AA(AA), EndIndex(0) { + const TargetMachine &TM = MF.getTarget(); const InstrItineraryData *InstrItins = TM.getInstrItineraryData(); HazardRec = @@ -260,6 +245,9 @@ void SchedulePostRATDList::dumpSchedule() const { #endif bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { + if (skipOptnoneFunction(*Fn.getFunction())) + return false; + TII = Fn.getTarget().getInstrInfo(); MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>(); @@ -320,7 +308,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { MachineBasicBlock::iterator Current = MBB->end(); unsigned Count = MBB->size(), CurrentCount = Count; for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) { - MachineInstr *MI = llvm::prior(I); + MachineInstr *MI = std::prev(I); --Count; // Calls are not scheduling boundaries before register allocation, but // post-ra we don't gain anything by scheduling across calls since we @@ -352,7 +340,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { Scheduler.finishBlock(); // Update register kills - Scheduler.FixupKills(MBB); + Scheduler.fixupKills(MBB); } return true; @@ -423,148 +411,6 @@ void SchedulePostRATDList::finishBlock() { ScheduleDAGInstrs::finishBlock(); } -/// StartBlockForKills - Initialize register live-range state for updating kills -/// -void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) { - // Start with no live registers. - LiveRegs.reset(); - - // Examine the live-in regs of all successors. - for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), - SE = BB->succ_end(); SI != SE; ++SI) { - for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), - E = (*SI)->livein_end(); I != E; ++I) { - unsigned Reg = *I; - // Repeat, for reg and all subregs. - for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); - SubRegs.isValid(); ++SubRegs) - LiveRegs.set(*SubRegs); - } - } -} - -bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI, - MachineOperand &MO) { - // Setting kill flag... - if (!MO.isKill()) { - MO.setIsKill(true); - return false; - } - - // If MO itself is live, clear the kill flag... - if (LiveRegs.test(MO.getReg())) { - MO.setIsKill(false); - return false; - } - - // If any subreg of MO is live, then create an imp-def for that - // subreg and keep MO marked as killed. - MO.setIsKill(false); - bool AllDead = true; - const unsigned SuperReg = MO.getReg(); - MachineInstrBuilder MIB(MF, MI); - for (MCSubRegIterator SubRegs(SuperReg, TRI); SubRegs.isValid(); ++SubRegs) { - if (LiveRegs.test(*SubRegs)) { - MIB.addReg(*SubRegs, RegState::ImplicitDefine); - AllDead = false; - } - } - - if(AllDead) - MO.setIsKill(true); - return false; -} - -/// FixupKills - Fix the register kill flags, they may have been made -/// incorrect by instruction reordering. -/// -void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { - DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n'); - - BitVector killedRegs(TRI->getNumRegs()); - - StartBlockForKills(MBB); - - // Examine block from end to start... - unsigned Count = MBB->size(); - for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin(); - I != E; --Count) { - MachineInstr *MI = --I; - if (MI->isDebugValue()) - continue; - - // Update liveness. Registers that are defed but not used in this - // instruction are now dead. Mark register and all subregs as they - // are completely defined. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (MO.isRegMask()) - LiveRegs.clearBitsNotInMask(MO.getRegMask()); - if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (Reg == 0) continue; - if (!MO.isDef()) continue; - // Ignore two-addr defs. - if (MI->isRegTiedToUseOperand(i)) continue; - - // Repeat for reg and all subregs. - for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); - SubRegs.isValid(); ++SubRegs) - LiveRegs.reset(*SubRegs); - } - - // Examine all used registers and set/clear kill flag. When a - // register is used multiple times we only set the kill flag on - // the first use. Don't set kill flags on undef operands. - killedRegs.reset(); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; - unsigned Reg = MO.getReg(); - if ((Reg == 0) || MRI.isReserved(Reg)) continue; - - bool kill = false; - if (!killedRegs.test(Reg)) { - kill = true; - // A register is not killed if any subregs are live... - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { - if (LiveRegs.test(*SubRegs)) { - kill = false; - break; - } - } - - // If subreg is not live, then register is killed if it became - // live in this instruction - if (kill) - kill = !LiveRegs.test(Reg); - } - - if (MO.isKill() != kill) { - DEBUG(dbgs() << "Fixing " << MO << " in "); - // Warning: ToggleKillFlag may invalidate MO. - ToggleKillFlag(MI, MO); - DEBUG(MI->dump()); - } - - killedRegs.set(Reg); - } - - // Mark any used register (that is not using undef) and subregs as - // now live... - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; - unsigned Reg = MO.getReg(); - if ((Reg == 0) || MRI.isReserved(Reg)) continue; - - for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); - SubRegs.isValid(); ++SubRegs) - LiveRegs.set(*SubRegs); - } - } -} - //===----------------------------------------------------------------------===// // Top-Down Scheduling //===----------------------------------------------------------------------===// @@ -630,6 +476,14 @@ void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { AvailableQueue.scheduledNode(SU); } +/// emitNoop - Add a noop to the current instruction sequence. +void SchedulePostRATDList::emitNoop(unsigned CurCycle) { + DEBUG(dbgs() << "*** Emitting noop in cycle " << CurCycle << '\n'); + HazardRec->EmitNoop(); + Sequence.push_back(0); // NULL here means noop + ++NumNoops; +} + /// ListScheduleTopDown - The main loop of list scheduling for top-down /// schedulers. void SchedulePostRATDList::ListScheduleTopDown() { @@ -678,7 +532,7 @@ void SchedulePostRATDList::ListScheduleTopDown() { DEBUG(dbgs() << "\n*** Examining Available\n"; AvailableQueue.dump(this)); - SUnit *FoundSUnit = 0; + SUnit *FoundSUnit = 0, *NotPreferredSUnit = 0; bool HasNoopHazards = false; while (!AvailableQueue.empty()) { SUnit *CurSUnit = AvailableQueue.pop(); @@ -686,8 +540,19 @@ void SchedulePostRATDList::ListScheduleTopDown() { ScheduleHazardRecognizer::HazardType HT = HazardRec->getHazardType(CurSUnit, 0/*no stalls*/); if (HT == ScheduleHazardRecognizer::NoHazard) { - FoundSUnit = CurSUnit; - break; + if (HazardRec->ShouldPreferAnother(CurSUnit)) { + if (!NotPreferredSUnit) { + // If this is the first non-preferred node for this cycle, then + // record it and continue searching for a preferred node. If this + // is not the first non-preferred node, then treat it as though + // there had been a hazard. + NotPreferredSUnit = CurSUnit; + continue; + } + } else { + FoundSUnit = CurSUnit; + break; + } } // Remember if this is a noop hazard. @@ -696,6 +561,20 @@ void SchedulePostRATDList::ListScheduleTopDown() { NotReady.push_back(CurSUnit); } + // If we have a non-preferred node, push it back onto the available list. + // If we did not find a preferred node, then schedule this first + // non-preferred node. + if (NotPreferredSUnit) { + if (!FoundSUnit) { + DEBUG(dbgs() << "*** Will schedule a non-preferred instruction...\n"); + FoundSUnit = NotPreferredSUnit; + } else { + AvailableQueue.push(NotPreferredSUnit); + } + + NotPreferredSUnit = 0; + } + // Add the nodes that aren't ready back onto the available list. if (!NotReady.empty()) { AvailableQueue.push_all(NotReady); @@ -704,6 +583,11 @@ void SchedulePostRATDList::ListScheduleTopDown() { // If we found a node to schedule... if (FoundSUnit) { + // If we need to emit noops prior to this instruction, then do so. + unsigned NumPreNoops = HazardRec->PreEmitNoops(FoundSUnit); + for (unsigned i = 0; i != NumPreNoops; ++i) + emitNoop(CurCycle); + // ... schedule the node... ScheduleNodeTopDown(FoundSUnit, CurCycle); HazardRec->EmitInstruction(FoundSUnit); @@ -728,10 +612,7 @@ void SchedulePostRATDList::ListScheduleTopDown() { // Otherwise, we have no instructions to issue and we have instructions // that will fault if we don't do this right. This is the case for // processors without pipeline interlocks and other cases. - DEBUG(dbgs() << "*** Emitting noop in cycle " << CurCycle << '\n'); - HazardRec->EmitNoop(); - Sequence.push_back(0); // NULL here means noop - ++NumNoops; + emitNoop(CurCycle); } ++CurCycle; @@ -769,13 +650,13 @@ void SchedulePostRATDList::EmitSchedule() { // Update the Begin iterator, as the first instruction in the block // may have been scheduled later. if (i == 0) - RegionBegin = prior(RegionEnd); + RegionBegin = std::prev(RegionEnd); } // Reinsert any remaining debug_values. for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { - std::pair<MachineInstr *, MachineInstr *> P = *prior(DI); + std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI); MachineInstr *DbgValue = P.first; MachineBasicBlock::iterator OrigPrivMI = P.second; BB->splice(++OrigPrivMI, BB, DbgValue); diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp index 0c5173a..360e8d7 100644 --- a/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/lib/CodeGen/ProcessImplicitDefs.cpp @@ -41,9 +41,9 @@ public: initializeProcessImplicitDefsPass(*PassRegistry::getPassRegistry()); } - virtual void getAnalysisUsage(AnalysisUsage &au) const; + void getAnalysisUsage(AnalysisUsage &au) const override; - virtual bool runOnMachineFunction(MachineFunction &fn); + bool runOnMachineFunction(MachineFunction &fn) override; }; } // end anonymous namespace @@ -80,10 +80,7 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) { if (TargetRegisterInfo::isVirtualRegister(Reg)) { // For virtual registers, mark all uses as <undef>, and convert users to // implicit-def when possible. - for (MachineRegisterInfo::use_nodbg_iterator UI = - MRI->use_nodbg_begin(Reg), - UE = MRI->use_nodbg_end(); UI != UE; ++UI) { - MachineOperand &MO = UI.getOperand(); + for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) { MO.setIsUndef(); MachineInstr *UserMI = MO.getParent(); if (!canTurnIntoImplicitDef(UserMI)) diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index b0e494f..136b1ed 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -20,6 +20,7 @@ #include "PrologEpilogInserter.h" #include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineDominators.h" @@ -29,7 +30,10 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/StackProtector.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/InlineAsm.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" @@ -54,6 +58,7 @@ INITIALIZE_PASS_BEGIN(PEI, "prologepilog", "Prologue/Epilogue Insertion", false, false) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(StackProtector) INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) INITIALIZE_PASS_END(PEI, "prologepilog", "Prologue/Epilogue Insertion & Frame Finalization", @@ -67,6 +72,7 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addPreserved<MachineLoopInfo>(); AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<StackProtector>(); AU.addRequired<TargetPassConfig>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -95,6 +101,9 @@ void PEI::calculateSets(MachineFunction &Fn) { return; } +/// StackObjSet - A set of stack object indexes +typedef SmallSetVector<int, 8> StackObjSet; + /// runOnMachineFunction - Insert prolog/epilog code and replace abstract /// frame indexes with appropriate references. /// @@ -160,10 +169,11 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { // Warn on stack size when we exceeds the given limit. MachineFrameInfo *MFI = Fn.getFrameInfo(); - if (WarnStackSize.getNumOccurrences() > 0 && - WarnStackSize < MFI->getStackSize()) - errs() << "warning: Stack size limit exceeded (" << MFI->getStackSize() - << ") in " << Fn.getName() << ".\n"; + uint64_t StackSize = MFI->getStackSize(); + if (WarnStackSize.getNumOccurrences() > 0 && WarnStackSize < StackSize) { + DiagnosticInfoStackSize DiagStackSize(*F, StackSize); + F->getContext().diagnose(DiagStackSize); + } delete RS; ReturnBlocks.clear(); @@ -409,11 +419,28 @@ AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, } } +/// AssignProtectedObjSet - Helper function to assign large stack objects (i.e., +/// those required to be close to the Stack Protector) to stack offsets. +static void +AssignProtectedObjSet(const StackObjSet &UnassignedObjs, + SmallSet<int, 16> &ProtectedObjs, + MachineFrameInfo *MFI, bool StackGrowsDown, + int64_t &Offset, unsigned &MaxAlign) { + + for (StackObjSet::const_iterator I = UnassignedObjs.begin(), + E = UnassignedObjs.end(); I != E; ++I) { + int i = *I; + AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign); + ProtectedObjs.insert(i); + } +} + /// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the /// abstract stack objects. /// void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering(); + StackProtector *SP = &getAnalysis<StackProtector>(); bool StackGrowsDown = TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; @@ -523,8 +550,12 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // Make sure that the stack protector comes before the local variables on the // stack. - SmallSet<int, 16> LargeStackObjs; + SmallSet<int, 16> ProtectedObjs; if (MFI->getStackProtectorIndex() >= 0) { + StackObjSet LargeArrayObjs; + StackObjSet SmallArrayObjs; + StackObjSet AddrOfObjs; + AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown, Offset, MaxAlign); @@ -541,12 +572,29 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { continue; if (MFI->getStackProtectorIndex() == (int)i) continue; - if (!MFI->MayNeedStackProtector(i)) - continue; - AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign); - LargeStackObjs.insert(i); + switch (SP->getSSPLayout(MFI->getObjectAllocation(i))) { + case StackProtector::SSPLK_None: + continue; + case StackProtector::SSPLK_SmallArray: + SmallArrayObjs.insert(i); + continue; + case StackProtector::SSPLK_AddrOf: + AddrOfObjs.insert(i); + continue; + case StackProtector::SSPLK_LargeArray: + LargeArrayObjs.insert(i); + continue; + } + llvm_unreachable("Unexpected SSPLayoutKind."); } + + AssignProtectedObjSet(LargeArrayObjs, ProtectedObjs, MFI, StackGrowsDown, + Offset, MaxAlign); + AssignProtectedObjSet(SmallArrayObjs, ProtectedObjs, MFI, StackGrowsDown, + Offset, MaxAlign); + AssignProtectedObjSet(AddrOfObjs, ProtectedObjs, MFI, StackGrowsDown, + Offset, MaxAlign); } // Then assign frame offsets to stack objects that are not used to spill @@ -563,7 +611,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { continue; if (MFI->getStackProtectorIndex() == (int)i) continue; - if (LargeStackObjs.count(i)) + if (ProtectedObjs.count(i)) continue; AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign); @@ -711,14 +759,14 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, SPAdj += Size; MachineBasicBlock::iterator PrevI = BB->end(); - if (I != BB->begin()) PrevI = prior(I); + if (I != BB->begin()) PrevI = std::prev(I); TFI->eliminateCallFramePseudoInstr(Fn, *BB, I); // Visit the instructions created by eliminateCallFramePseudoInstr(). if (PrevI == BB->end()) I = BB->begin(); // The replaced instr was the first in the block. else - I = llvm::next(PrevI); + I = std::next(PrevI); continue; } @@ -801,9 +849,9 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { I = BB->begin(); MachineInstr *MI = I; - MachineBasicBlock::iterator J = llvm::next(I); - MachineBasicBlock::iterator P = I == BB->begin() ? - MachineBasicBlock::iterator(NULL) : llvm::prior(I); + MachineBasicBlock::iterator J = std::next(I); + MachineBasicBlock::iterator P = + I == BB->begin() ? MachineBasicBlock::iterator(NULL) : std::prev(I); // RS should process this instruction before we might scavenge at this // location. This is because we might be replacing a virtual register @@ -846,7 +894,7 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { // spill code will have been inserted in between I and J. This is a // problem because we need the spill code before I: Move I to just // prior to J. - if (I != llvm::prior(J)) { + if (I != std::prev(J)) { BB->splice(J, BB, I); // Before we move I, we need to prepare the RS to visit I again. diff --git a/lib/CodeGen/PrologEpilogInserter.h b/lib/CodeGen/PrologEpilogInserter.h index 77cfa2b..5a6d39a 100644 --- a/lib/CodeGen/PrologEpilogInserter.h +++ b/lib/CodeGen/PrologEpilogInserter.h @@ -37,12 +37,12 @@ namespace llvm { initializePEIPass(*PassRegistry::getPassRegistry()); } - virtual void getAnalysisUsage(AnalysisUsage &AU) const; + void getAnalysisUsage(AnalysisUsage &AU) const override; /// runOnMachineFunction - Insert prolog/epilog code and replace abstract /// frame indexes with appropriate references. /// - bool runOnMachineFunction(MachineFunction &Fn); + bool runOnMachineFunction(MachineFunction &Fn) override; private: RegScavenger *RS; diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp index 293e306..33584f8 100644 --- a/lib/CodeGen/RegAllocBase.cpp +++ b/lib/CodeGen/RegAllocBase.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file defines the RegAllocBase class which provides comon functionality +// This file defines the RegAllocBase class which provides common functionality // for LiveIntervalUnion-based register allocators. // //===----------------------------------------------------------------------===// @@ -101,8 +101,8 @@ void RegAllocBase::allocatePhysRegs() { // register if possible and populate a list of new live intervals that // result from splitting. DEBUG(dbgs() << "\nselectOrSplit " - << MRI->getRegClass(VirtReg->reg)->getName() - << ':' << *VirtReg << '\n'); + << MRI->getRegClass(VirtReg->reg)->getName() + << ':' << *VirtReg << " w=" << VirtReg->weight << '\n'); typedef SmallVector<unsigned, 4> VirtRegVec; VirtRegVec SplitVRegs; unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs); @@ -110,11 +110,16 @@ void RegAllocBase::allocatePhysRegs() { if (AvailablePhysReg == ~0u) { // selectOrSplit failed to find a register! // Probably caused by an inline asm. - MachineInstr *MI; - for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(VirtReg->reg); - (MI = I.skipInstruction());) - if (MI->isInlineAsm()) + MachineInstr *MI = 0; + for (MachineRegisterInfo::reg_instr_iterator + I = MRI->reg_instr_begin(VirtReg->reg), E = MRI->reg_instr_end(); + I != E; ) { + MachineInstr *TmpMI = &*(I++); + if (TmpMI->isInlineAsm()) { + MI = TmpMI; break; + } + } if (MI) MI->emitError("inline assembly requires more registers than available"); else diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h index c17a8d9..68bd4b5 100644 --- a/lib/CodeGen/RegAllocBase.h +++ b/lib/CodeGen/RegAllocBase.h @@ -37,7 +37,6 @@ #ifndef LLVM_CODEGEN_REGALLOCBASE #define LLVM_CODEGEN_REGALLOCBASE -#include "llvm/ADT/OwningPtr.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/RegisterClassInfo.h" diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp index 6768e45..b8c04fc 100644 --- a/lib/CodeGen/RegAllocBasic.cpp +++ b/lib/CodeGen/RegAllocBasic.cpp @@ -64,7 +64,7 @@ class RABasic : public MachineFunctionPass, public RegAllocBase MachineFunction *MF; // state - OwningPtr<Spiller> SpillerInstance; + std::unique_ptr<Spiller> SpillerInstance; std::priority_queue<LiveInterval*, std::vector<LiveInterval*>, CompSpillWeight> Queue; @@ -76,24 +76,22 @@ public: RABasic(); /// Return the pass name. - virtual const char* getPassName() const { + const char* getPassName() const override { return "Basic Register Allocator"; } /// RABasic analysis usage. - virtual void getAnalysisUsage(AnalysisUsage &AU) const; + void getAnalysisUsage(AnalysisUsage &AU) const override; - virtual void releaseMemory(); + void releaseMemory() override; - virtual Spiller &spiller() { return *SpillerInstance; } + Spiller &spiller() override { return *SpillerInstance; } - virtual float getPriority(LiveInterval *LI) { return LI->weight; } - - virtual void enqueue(LiveInterval *LI) { + void enqueue(LiveInterval *LI) override { Queue.push(LI); } - virtual LiveInterval *dequeue() { + LiveInterval *dequeue() override { if (Queue.empty()) return 0; LiveInterval *LI = Queue.top(); @@ -101,11 +99,11 @@ public: return LI; } - virtual unsigned selectOrSplit(LiveInterval &VirtReg, - SmallVectorImpl<unsigned> &SplitVRegs); + unsigned selectOrSplit(LiveInterval &VirtReg, + SmallVectorImpl<unsigned> &SplitVRegs) override; /// Perform register allocation. - virtual bool runOnMachineFunction(MachineFunction &mf); + bool runOnMachineFunction(MachineFunction &mf) override; // Helper for spilling all live virtual registers currently unified under preg // that interfere with the most recently queried lvr. Return true if spilling diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index e92dbd2..8dc44f5 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -144,23 +144,23 @@ namespace { // not be erased. bool isBulkSpilling; - enum LLVM_ENUM_INT_TYPE(unsigned) { + enum : unsigned { spillClean = 1, spillDirty = 100, spillImpossible = ~0u }; public: - virtual const char *getPassName() const { + const char *getPassName() const override { return "Fast Register Allocator"; } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); } private: - bool runOnMachineFunction(MachineFunction &Fn); + bool runOnMachineFunction(MachineFunction &Fn) override; void AllocateBasicBlock(); void handleThroughOperands(MachineInstr *MI, SmallVectorImpl<unsigned> &VirtDead); @@ -224,7 +224,7 @@ bool RAFast::isLastUseOfLocalReg(MachineOperand &MO) { // Check that the use/def chain has exactly one operand - MO. MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(MO.getReg()); - if (&I.getOperand() != &MO) + if (&*I != &MO) return false; return ++I == MRI->reg_nodbg_end(); } @@ -585,12 +585,12 @@ RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum, "Not a virtual register"); LiveRegMap::iterator LRI; bool New; - tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg)); + std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg)); if (New) { // If there is no hint, peek at the only use of this register. if ((!Hint || !TargetRegisterInfo::isPhysicalRegister(Hint)) && MRI->hasOneNonDBGUse(VirtReg)) { - const MachineInstr &UseMI = *MRI->use_nodbg_begin(VirtReg); + const MachineInstr &UseMI = *MRI->use_instr_nodbg_begin(VirtReg); // It's a copy, use the destination register as a hint. if (UseMI.isCopyLike()) Hint = UseMI.getOperand(0).getReg(); @@ -618,7 +618,7 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum, "Not a virtual register"); LiveRegMap::iterator LRI; bool New; - tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg)); + std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg)); MachineOperand &MO = MI->getOperand(OpNum); if (New) { LRI = allocVirtReg(MI, LRI, Hint); diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index c08d955..6a623b8 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -35,6 +35,7 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/PassAnalysisSupport.h" #include "llvm/Support/CommandLine.h" @@ -59,6 +60,23 @@ SplitSpillMode("split-spill-mode", cl::Hidden, clEnumValEnd), cl::init(SplitEditor::SM_Partition)); +static cl::opt<unsigned> +LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden, + cl::desc("Last chance recoloring max depth"), + cl::init(5)); + +static cl::opt<unsigned> LastChanceRecoloringMaxInterference( + "lcr-max-interf", cl::Hidden, + cl::desc("Last chance recoloring maximum number of considered" + " interference at a time"), + cl::init(8)); + +// FIXME: Find a good default for this flag and remove the flag. +static cl::opt<unsigned> +CSRFirstTimeCost("regalloc-csr-first-time-cost", + cl::desc("Cost for first time use of callee-saved register."), + cl::init(0), cl::Hidden); + static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", createGreedyRegisterAllocator); @@ -66,10 +84,19 @@ namespace { class RAGreedy : public MachineFunctionPass, public RegAllocBase, private LiveRangeEdit::Delegate { + // Convenient shortcuts. + typedef std::priority_queue<std::pair<unsigned, unsigned> > PQueue; + typedef SmallPtrSet<LiveInterval *, 4> SmallLISet; + typedef SmallSet<unsigned, 16> SmallVirtRegSet; // context MachineFunction *MF; + // Shortcuts to some useful interface. + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + RegisterClassInfo RCI; + // analyses SlotIndexes *Indexes; MachineBlockFrequencyInfo *MBFI; @@ -80,8 +107,8 @@ class RAGreedy : public MachineFunctionPass, LiveDebugVariables *DebugVars; // state - OwningPtr<Spiller> SpillerInstance; - std::priority_queue<std::pair<unsigned, unsigned> > Queue; + std::unique_ptr<Spiller> SpillerInstance; + PQueue Queue; unsigned NextCascade; // Live ranges pass through a number of stages as we try to allocate them. @@ -160,20 +187,23 @@ class RAGreedy : public MachineFunctionPass, unsigned BrokenHints; ///< Total number of broken hints. float MaxWeight; ///< Maximum spill weight evicted. - EvictionCost(unsigned B = 0) : BrokenHints(B), MaxWeight(0) {} + EvictionCost(): BrokenHints(0), MaxWeight(0) {} bool isMax() const { return BrokenHints == ~0u; } + void setMax() { BrokenHints = ~0u; } + + void setBrokenHints(unsigned NHints) { BrokenHints = NHints; } + bool operator<(const EvictionCost &O) const { - if (BrokenHints != O.BrokenHints) - return BrokenHints < O.BrokenHints; - return MaxWeight < O.MaxWeight; + return std::tie(BrokenHints, MaxWeight) < + std::tie(O.BrokenHints, O.MaxWeight); } }; // splitting state. - OwningPtr<SplitAnalysis> SA; - OwningPtr<SplitEditor> SE; + std::unique_ptr<SplitAnalysis> SA; + std::unique_ptr<SplitEditor> SE; /// Cached per-block interference maps InterferenceCache IntfCache; @@ -217,12 +247,12 @@ class RAGreedy : public MachineFunctionPass, } }; - /// Candidate info for for each PhysReg in AllocationOrder. + /// Candidate info for each PhysReg in AllocationOrder. /// This vector never shrinks, but grows to the size of the largest register /// class. SmallVector<GlobalSplitCandidate, 32> GlobalCand; - enum LLVM_ENUM_INT_TYPE(unsigned) { NoCand = ~0u }; + enum : unsigned { NoCand = ~0u }; /// Candidate map. Each edge bundle is assigned to a GlobalCand entry, or to /// NoCand which indicates the stack interval. @@ -232,28 +262,32 @@ public: RAGreedy(); /// Return the pass name. - virtual const char* getPassName() const { + const char* getPassName() const override { return "Greedy Register Allocator"; } /// RAGreedy analysis usage. - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - virtual void releaseMemory(); - virtual Spiller &spiller() { return *SpillerInstance; } - virtual void enqueue(LiveInterval *LI); - virtual LiveInterval *dequeue(); - virtual unsigned selectOrSplit(LiveInterval&, - SmallVectorImpl<unsigned>&); + void getAnalysisUsage(AnalysisUsage &AU) const override; + void releaseMemory() override; + Spiller &spiller() override { return *SpillerInstance; } + void enqueue(LiveInterval *LI) override; + LiveInterval *dequeue() override; + unsigned selectOrSplit(LiveInterval&, SmallVectorImpl<unsigned>&) override; /// Perform register allocation. - virtual bool runOnMachineFunction(MachineFunction &mf); + bool runOnMachineFunction(MachineFunction &mf) override; static char ID; private: - bool LRE_CanEraseVirtReg(unsigned); - void LRE_WillShrinkVirtReg(unsigned); - void LRE_DidCloneVirtReg(unsigned, unsigned); + unsigned selectOrSplitImpl(LiveInterval &, SmallVectorImpl<unsigned> &, + SmallVirtRegSet &, unsigned = 0); + + bool LRE_CanEraseVirtReg(unsigned) override; + void LRE_WillShrinkVirtReg(unsigned) override; + void LRE_DidCloneVirtReg(unsigned, unsigned) override; + void enqueue(PQueue &CurQueue, LiveInterval *LI); + LiveInterval *dequeue(PQueue &CurQueue); BlockFrequency calcSpillCost(); bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency&); @@ -268,6 +302,9 @@ private: bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&); void evictInterference(LiveInterval&, unsigned, SmallVectorImpl<unsigned>&); + bool mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg, + SmallLISet &RecoloringCandidates, + const SmallVirtRegSet &FixedRegisters); unsigned tryAssign(LiveInterval&, AllocationOrder&, SmallVectorImpl<unsigned>&); @@ -275,6 +312,20 @@ private: SmallVectorImpl<unsigned>&, unsigned = ~0u); unsigned tryRegionSplit(LiveInterval&, AllocationOrder&, SmallVectorImpl<unsigned>&); + /// Calculate cost of region splitting. + unsigned calculateRegionSplitCost(LiveInterval &VirtReg, + AllocationOrder &Order, + BlockFrequency &BestCost, + unsigned &NumCands, bool IgnoreCSR); + /// Perform region splitting. + unsigned doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, + bool HasCompact, + SmallVectorImpl<unsigned> &NewVRegs); + /// Check other options before using a callee-saved register for the first + /// time. + unsigned tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order, + unsigned PhysReg, unsigned &CostPerUseLimit, + SmallVectorImpl<unsigned> &NewVRegs); unsigned tryBlockSplit(LiveInterval&, AllocationOrder&, SmallVectorImpl<unsigned>&); unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&, @@ -283,6 +334,11 @@ private: SmallVectorImpl<unsigned>&); unsigned trySplit(LiveInterval&, AllocationOrder&, SmallVectorImpl<unsigned>&); + unsigned tryLastChanceRecoloring(LiveInterval &, AllocationOrder &, + SmallVectorImpl<unsigned> &, + SmallVirtRegSet &, unsigned); + bool tryRecoloringCandidates(PQueue &, SmallVectorImpl<unsigned> &, + SmallVirtRegSet &, unsigned); }; } // end anonymous namespace @@ -301,7 +357,7 @@ const char *const RAGreedy::StageName[] = { // Hysteresis to use when comparing floats. // This helps stabilize decisions based on float comparisons. -const float Hysteresis = 0.98f; +const float Hysteresis = (2007 / 2048.0f); // 0.97998046875 FunctionPass* llvm::createGreedyRegisterAllocator() { @@ -396,7 +452,9 @@ void RAGreedy::releaseMemory() { GlobalCand.clear(); } -void RAGreedy::enqueue(LiveInterval *LI) { +void RAGreedy::enqueue(LiveInterval *LI) { enqueue(Queue, LI); } + +void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { // Prioritize live ranges by size, assigning larger ranges first. // The queue holds (size, reg) pairs. const unsigned Size = LI->getSize(); @@ -414,12 +472,25 @@ void RAGreedy::enqueue(LiveInterval *LI) { // everything else has been allocated. Prio = Size; } else { - if (ExtraRegInfo[Reg].Stage == RS_Assign && !LI->empty() && + // Giant live ranges fall back to the global assignment heuristic, which + // prevents excessive spilling in pathological cases. + bool ReverseLocal = TRI->reverseLocalAssignment(); + bool ForceGlobal = !ReverseLocal && TRI->mayOverrideLocalAssignment() && + (Size / SlotIndex::InstrDist) > (2 * MRI->getRegClass(Reg)->getNumRegs()); + + if (ExtraRegInfo[Reg].Stage == RS_Assign && !ForceGlobal && !LI->empty() && LIS->intervalIsInOneMBB(*LI)) { // Allocate original local ranges in linear instruction order. Since they // are singly defined, this produces optimal coloring in the absence of // global interference and other constraints. - Prio = LI->beginIndex().getInstrDistance(Indexes->getLastIndex()); + if (!ReverseLocal) + Prio = LI->beginIndex().getInstrDistance(Indexes->getLastIndex()); + else { + // Allocating bottom up may allow many short LRGs to be assigned first + // to one of the cheap registers. This could be much faster for very + // large blocks on targets with many physical registers. + Prio = Indexes->getZeroIndex().getInstrDistance(LI->beginIndex()); + } } else { // Allocate global and split ranges in long->short order. Long ranges that @@ -436,14 +507,16 @@ void RAGreedy::enqueue(LiveInterval *LI) { } // The virtual register number is a tie breaker for same-sized ranges. // Give lower vreg numbers higher priority to assign them first. - Queue.push(std::make_pair(Prio, ~Reg)); + CurQueue.push(std::make_pair(Prio, ~Reg)); } -LiveInterval *RAGreedy::dequeue() { - if (Queue.empty()) +LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); } + +LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) { + if (CurQueue.empty()) return 0; - LiveInterval *LI = &LIS->getInterval(~Queue.top().second); - Queue.pop(); + LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second); + CurQueue.pop(); return LI; } @@ -471,7 +544,8 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg)) if (Order.isHint(Hint)) { DEBUG(dbgs() << "missed hint " << PrintReg(Hint, TRI) << '\n'); - EvictionCost MaxCost(1); + EvictionCost MaxCost; + MaxCost.setBrokenHints(1); if (canEvictInterference(VirtReg, Hint, true, MaxCost)) { evictInterference(VirtReg, Hint, NewVRegs); return Hint; @@ -543,11 +617,15 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, if (CanSplit && IsHint && !BreaksHint) return true; - return A.weight > B.weight; + if (A.weight > B.weight) { + DEBUG(dbgs() << "should evict: " << B << " w= " << B.weight << '\n'); + return true; + } + return false; } /// canEvictInterference - Return true if all interferences between VirtReg and -/// PhysReg can be evicted. When OnlyCheap is set, don't do anything +/// PhysReg can be evicted. /// /// @param VirtReg Live range that is about to be assigned. /// @param PhysReg Desired register for assignment. @@ -618,6 +696,9 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, return false; if (Urgent) continue; + // Apply the eviction policy for non-urgent evictions. + if (!shouldEvict(VirtReg, IsHint, *Intf, BreaksHint)) + return false; // If !MaxCost.isMax(), then we're just looking for a cheap register. // Evicting another local live range in this case could lead to suboptimal // coloring. @@ -625,9 +706,6 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, !canReassign(*Intf, PhysReg)) { return false; } - // Finally, apply the eviction policy for non-urgent evictions. - if (!shouldEvict(VirtReg, IsHint, *Intf, BreaksHint)) - return false; } } MaxCost = Cost; @@ -685,7 +763,8 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled); // Keep track of the cheapest interference seen so far. - EvictionCost BestCost(~0u); + EvictionCost BestCost; + BestCost.setMax(); unsigned BestPhys = 0; unsigned OrderLimit = Order.getOrder().size(); @@ -713,7 +792,7 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, } Order.rewind(); - while (unsigned PhysReg = Order.nextWithDups(OrderLimit)) { + while (unsigned PhysReg = Order.next(OrderLimit)) { if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit) continue; // The first use of a callee-saved register in a function has cost 1. @@ -1172,9 +1251,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl<unsigned> &NewVRegs) { unsigned NumCands = 0; - unsigned BestCand = NoCand; BlockFrequency BestCost; - SmallVector<unsigned, 8> UsedCands; // Check if we can split this live range around a compact region. bool HasCompact = calcCompactRegion(GlobalCand.front()); @@ -1186,11 +1263,33 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, // No benefit from the compact region, our fallback will be per-block // splitting. Make sure we find a solution that is cheaper than spilling. BestCost = calcSpillCost(); - DEBUG(dbgs() << "Cost of isolating all blocks = " << BestCost << '\n'); + DEBUG(dbgs() << "Cost of isolating all blocks = "; + MBFI->printBlockFreq(dbgs(), BestCost) << '\n'); } + unsigned BestCand = + calculateRegionSplitCost(VirtReg, Order, BestCost, NumCands, + false/*IgnoreCSR*/); + + // No solutions found, fall back to single block splitting. + if (!HasCompact && BestCand == NoCand) + return 0; + + return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs); +} + +unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg, + AllocationOrder &Order, + BlockFrequency &BestCost, + unsigned &NumCands, + bool IgnoreCSR) { + unsigned BestCand = NoCand; Order.rewind(); while (unsigned PhysReg = Order.next()) { + if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg)) + if (IgnoreCSR && !MRI->isPhysRegUsed(CSR)) + continue; + // Discard bad candidates before we run out of interference cache cursors. // This will only affect register classes with a lot of registers (>32). if (NumCands == IntfCache.getMaxCursors()) { @@ -1220,7 +1319,8 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n"); continue; } - DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tstatic = " << Cost); + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tstatic = "; + MBFI->printBlockFreq(dbgs(), Cost)); if (Cost >= BestCost) { DEBUG({ if (BestCand == NoCand) @@ -1243,7 +1343,8 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, Cost += calcGlobalSplitCost(Cand); DEBUG({ - dbgs() << ", total = " << Cost << " with bundles"; + dbgs() << ", total = "; MBFI->printBlockFreq(dbgs(), Cost) + << " with bundles"; for (int i = Cand.LiveBundles.find_first(); i>=0; i = Cand.LiveBundles.find_next(i)) dbgs() << " EB#" << i; @@ -1255,11 +1356,13 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, } ++NumCands; } + return BestCand; +} - // No solutions found, fall back to single block splitting. - if (!HasCompact && BestCand == NoCand) - return 0; - +unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, + bool HasCompact, + SmallVectorImpl<unsigned> &NewVRegs) { + SmallVector<unsigned, 8> UsedCands; // Prepare split editor. LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); SE->reset(LREdit, SplitSpillMode); @@ -1348,6 +1451,22 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, // Per-Instruction Splitting //===----------------------------------------------------------------------===// +/// Get the number of allocatable registers that match the constraints of \p Reg +/// on \p MI and that are also in \p SuperRC. +static unsigned getNumAllocatableRegsForConstraints( + const MachineInstr *MI, unsigned Reg, const TargetRegisterClass *SuperRC, + const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, + const RegisterClassInfo &RCI) { + assert(SuperRC && "Invalid register class"); + + const TargetRegisterClass *ConstrainedRC = + MI->getRegClassConstraintEffectForVReg(Reg, SuperRC, TII, TRI, + /* ExploreBundle */ true); + if (!ConstrainedRC) + return 0; + return RCI.getNumAllocatableRegs(ConstrainedRC); +} + /// tryInstructionSplit - Split a live range around individual instructions. /// This is normally not worthwhile since the spiller is doing essentially the /// same thing. However, when the live range is in a constrained register @@ -1358,8 +1477,9 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, unsigned RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl<unsigned> &NewVRegs) { + const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg); // There is no point to this if there are no larger sub-classes. - if (!RegClassInfo.isProperSubClass(MRI->getRegClass(VirtReg.reg))) + if (!RegClassInfo.isProperSubClass(CurRC)) return 0; // Always enable split spill mode, since we're effectively spilling to a @@ -1373,10 +1493,18 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, DEBUG(dbgs() << "Split around " << Uses.size() << " individual instrs.\n"); - // Split around every non-copy instruction. + const TargetRegisterClass *SuperRC = TRI->getLargestLegalSuperClass(CurRC); + unsigned SuperRCNumAllocatableRegs = RCI.getNumAllocatableRegs(SuperRC); + // Split around every non-copy instruction if this split will relax + // the constraints on the virtual register. + // Otherwise, splitting just inserts uncoalescable copies that do not help + // the allocation. for (unsigned i = 0; i != Uses.size(); ++i) { if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i])) - if (MI->isFullCopy()) { + if (MI->isFullCopy() || + SuperRCNumAllocatableRegs == + getNumAllocatableRegsForConstraints(MI, VirtReg.reg, SuperRC, TII, + TRI, RCI)) { DEBUG(dbgs() << " skip:\t" << Uses[i] << '\t' << *MI); continue; } @@ -1571,7 +1699,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, const float blockFreq = SpillPlacer->getBlockFrequency(BI.MBB->getNumber()).getFrequency() * - (1.0f / BlockFrequency::getEntryFrequency()); + (1.0f / MBFI->getEntryFreq()); SmallVector<float, 8> GapWeight; Order.rewind(); @@ -1759,6 +1887,220 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, return tryBlockSplit(VirtReg, Order, NewVRegs); } +//===----------------------------------------------------------------------===// +// Last Chance Recoloring +//===----------------------------------------------------------------------===// + +/// mayRecolorAllInterferences - Check if the virtual registers that +/// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be +/// recolored to free \p PhysReg. +/// When true is returned, \p RecoloringCandidates has been augmented with all +/// the live intervals that need to be recolored in order to free \p PhysReg +/// for \p VirtReg. +/// \p FixedRegisters contains all the virtual registers that cannot be +/// recolored. +bool +RAGreedy::mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg, + SmallLISet &RecoloringCandidates, + const SmallVirtRegSet &FixedRegisters) { + const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg); + + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); + // If there is LastChanceRecoloringMaxInterference or more interferences, + // chances are one would not be recolorable. + if (Q.collectInterferingVRegs(LastChanceRecoloringMaxInterference) >= + LastChanceRecoloringMaxInterference) { + DEBUG(dbgs() << "Early abort: too many interferences.\n"); + return false; + } + for (unsigned i = Q.interferingVRegs().size(); i; --i) { + LiveInterval *Intf = Q.interferingVRegs()[i - 1]; + // If Intf is done and sit on the same register class as VirtReg, + // it would not be recolorable as it is in the same state as VirtReg. + if ((getStage(*Intf) == RS_Done && + MRI->getRegClass(Intf->reg) == CurRC) || + FixedRegisters.count(Intf->reg)) { + DEBUG(dbgs() << "Early abort: the inteference is not recolorable.\n"); + return false; + } + RecoloringCandidates.insert(Intf); + } + } + return true; +} + +/// tryLastChanceRecoloring - Try to assign a color to \p VirtReg by recoloring +/// its interferences. +/// Last chance recoloring chooses a color for \p VirtReg and recolors every +/// virtual register that was using it. The recoloring process may recursively +/// use the last chance recoloring. Therefore, when a virtual register has been +/// assigned a color by this mechanism, it is marked as Fixed, i.e., it cannot +/// be last-chance-recolored again during this recoloring "session". +/// E.g., +/// Let +/// vA can use {R1, R2 } +/// vB can use { R2, R3} +/// vC can use {R1 } +/// Where vA, vB, and vC cannot be split anymore (they are reloads for +/// instance) and they all interfere. +/// +/// vA is assigned R1 +/// vB is assigned R2 +/// vC tries to evict vA but vA is already done. +/// Regular register allocation fails. +/// +/// Last chance recoloring kicks in: +/// vC does as if vA was evicted => vC uses R1. +/// vC is marked as fixed. +/// vA needs to find a color. +/// None are available. +/// vA cannot evict vC: vC is a fixed virtual register now. +/// vA does as if vB was evicted => vA uses R2. +/// vB needs to find a color. +/// R3 is available. +/// Recoloring => vC = R1, vA = R2, vB = R3 +/// +/// \p Order defines the preferred allocation order for \p VirtReg. +/// \p NewRegs will contain any new virtual register that have been created +/// (split, spill) during the process and that must be assigned. +/// \p FixedRegisters contains all the virtual registers that cannot be +/// recolored. +/// \p Depth gives the current depth of the last chance recoloring. +/// \return a physical register that can be used for VirtReg or ~0u if none +/// exists. +unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, + AllocationOrder &Order, + SmallVectorImpl<unsigned> &NewVRegs, + SmallVirtRegSet &FixedRegisters, + unsigned Depth) { + DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n'); + // Ranges must be Done. + assert((getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) && + "Last chance recoloring should really be last chance"); + // Set the max depth to LastChanceRecoloringMaxDepth. + // We may want to reconsider that if we end up with a too large search space + // for target with hundreds of registers. + // Indeed, in that case we may want to cut the search space earlier. + if (Depth >= LastChanceRecoloringMaxDepth) { + DEBUG(dbgs() << "Abort because max depth has been reached.\n"); + return ~0u; + } + + // Set of Live intervals that will need to be recolored. + SmallLISet RecoloringCandidates; + // Record the original mapping virtual register to physical register in case + // the recoloring fails. + DenseMap<unsigned, unsigned> VirtRegToPhysReg; + // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in + // this recoloring "session". + FixedRegisters.insert(VirtReg.reg); + + Order.rewind(); + while (unsigned PhysReg = Order.next()) { + DEBUG(dbgs() << "Try to assign: " << VirtReg << " to " + << PrintReg(PhysReg, TRI) << '\n'); + RecoloringCandidates.clear(); + VirtRegToPhysReg.clear(); + + // It is only possible to recolor virtual register interference. + if (Matrix->checkInterference(VirtReg, PhysReg) > + LiveRegMatrix::IK_VirtReg) { + DEBUG(dbgs() << "Some inteferences are not with virtual registers.\n"); + + continue; + } + + // Early give up on this PhysReg if it is obvious we cannot recolor all + // the interferences. + if (!mayRecolorAllInterferences(PhysReg, VirtReg, RecoloringCandidates, + FixedRegisters)) { + DEBUG(dbgs() << "Some inteferences cannot be recolored.\n"); + continue; + } + + // RecoloringCandidates contains all the virtual registers that interfer + // with VirtReg on PhysReg (or one of its aliases). + // Enqueue them for recoloring and perform the actual recoloring. + PQueue RecoloringQueue; + for (SmallLISet::iterator It = RecoloringCandidates.begin(), + EndIt = RecoloringCandidates.end(); + It != EndIt; ++It) { + unsigned ItVirtReg = (*It)->reg; + enqueue(RecoloringQueue, *It); + assert(VRM->hasPhys(ItVirtReg) && + "Interferences are supposed to be with allocated vairables"); + + // Record the current allocation. + VirtRegToPhysReg[ItVirtReg] = VRM->getPhys(ItVirtReg); + // unset the related struct. + Matrix->unassign(**It); + } + + // Do as if VirtReg was assigned to PhysReg so that the underlying + // recoloring has the right information about the interferes and + // available colors. + Matrix->assign(VirtReg, PhysReg); + + // Save the current recoloring state. + // If we cannot recolor all the interferences, we will have to start again + // at this point for the next physical register. + SmallVirtRegSet SaveFixedRegisters(FixedRegisters); + if (tryRecoloringCandidates(RecoloringQueue, NewVRegs, FixedRegisters, + Depth)) { + // Do not mess up with the global assignment process. + // I.e., VirtReg must be unassigned. + Matrix->unassign(VirtReg); + return PhysReg; + } + + DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to " + << PrintReg(PhysReg, TRI) << '\n'); + + // The recoloring attempt failed, undo the changes. + FixedRegisters = SaveFixedRegisters; + Matrix->unassign(VirtReg); + + for (SmallLISet::iterator It = RecoloringCandidates.begin(), + EndIt = RecoloringCandidates.end(); + It != EndIt; ++It) { + unsigned ItVirtReg = (*It)->reg; + if (VRM->hasPhys(ItVirtReg)) + Matrix->unassign(**It); + Matrix->assign(**It, VirtRegToPhysReg[ItVirtReg]); + } + } + + // Last chance recoloring did not worked either, give up. + return ~0u; +} + +/// tryRecoloringCandidates - Try to assign a new color to every register +/// in \RecoloringQueue. +/// \p NewRegs will contain any new virtual register created during the +/// recoloring process. +/// \p FixedRegisters[in/out] contains all the registers that have been +/// recolored. +/// \return true if all virtual registers in RecoloringQueue were successfully +/// recolored, false otherwise. +bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue, + SmallVectorImpl<unsigned> &NewVRegs, + SmallVirtRegSet &FixedRegisters, + unsigned Depth) { + while (!RecoloringQueue.empty()) { + LiveInterval *LI = dequeue(RecoloringQueue); + DEBUG(dbgs() << "Try to recolor: " << *LI << '\n'); + unsigned PhysReg; + PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, Depth + 1); + if (PhysReg == ~0u || !PhysReg) + return false; + DEBUG(dbgs() << "Recoloring of " << *LI + << " succeeded with: " << PrintReg(PhysReg, TRI) << '\n'); + Matrix->assign(*LI, PhysReg); + FixedRegisters.insert(LI->reg); + } + return true; +} //===----------------------------------------------------------------------===// // Main Entry Point @@ -1766,10 +2108,84 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, SmallVectorImpl<unsigned> &NewVRegs) { + SmallVirtRegSet FixedRegisters; + return selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters); +} + +/// Using a CSR for the first time has a cost because it causes push|pop +/// to be added to prologue|epilogue. Splitting a cold section of the live +/// range can have lower cost than using the CSR for the first time; +/// Spilling a live range in the cold path can have lower cost than using +/// the CSR for the first time. Returns the physical register if we decide +/// to use the CSR; otherwise return 0. +unsigned RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, + AllocationOrder &Order, + unsigned PhysReg, + unsigned &CostPerUseLimit, + SmallVectorImpl<unsigned> &NewVRegs) { + // We use the larger one out of the command-line option and the value report + // by TRI. + BlockFrequency CSRCost(std::max((unsigned)CSRFirstTimeCost, + TRI->getCSRFirstUseCost())); + if (getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) { + // We choose spill over using the CSR for the first time if the spill cost + // is lower than CSRCost. + SA->analyze(&VirtReg); + if (calcSpillCost() >= CSRCost) + return PhysReg; + + // We are going to spill, set CostPerUseLimit to 1 to make sure that + // we will not use a callee-saved register in tryEvict. + CostPerUseLimit = 1; + return 0; + } + if (getStage(VirtReg) < RS_Split) { + // We choose pre-splitting over using the CSR for the first time if + // the cost of splitting is lower than CSRCost. + SA->analyze(&VirtReg); + unsigned NumCands = 0; + unsigned BestCand = + calculateRegionSplitCost(VirtReg, Order, CSRCost, NumCands, + true/*IgnoreCSR*/); + if (BestCand == NoCand) + // Use the CSR if we can't find a region split below CSRCost. + return PhysReg; + + // Perform the actual pre-splitting. + doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs); + return 0; + } + return PhysReg; +} + +unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, + SmallVectorImpl<unsigned> &NewVRegs, + SmallVirtRegSet &FixedRegisters, + unsigned Depth) { + unsigned CostPerUseLimit = ~0u; // First try assigning a free register. AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo); - if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) - return PhysReg; + if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) { + // We check other options if we are using a CSR for the first time. + bool CSRFirstUse = false; + if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg)) + if (!MRI->isPhysRegUsed(CSR)) + CSRFirstUse = true; + + // When NewVRegs is not empty, we may have made decisions such as evicting + // a virtual register, go with the earlier decisions and use the physical + // register. + if ((CSRFirstTimeCost || TRI->getCSRFirstUseCost()) && + CSRFirstUse && NewVRegs.empty()) { + unsigned CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg, + CostPerUseLimit, NewVRegs); + if (CSRReg || !NewVRegs.empty()) + // Return now if we decide to use a CSR or create new vregs due to + // pre-splitting. + return CSRReg; + } else + return PhysReg; + } LiveRangeStage Stage = getStage(VirtReg); DEBUG(dbgs() << StageName[Stage] @@ -1779,7 +2195,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, // queue. The RS_Split ranges already failed to do this, and they should not // get a second chance until they have been split. if (Stage != RS_Split) - if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs)) + if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit)) return PhysReg; assert(NewVRegs.empty() && "Cannot append to existing NewVRegs"); @@ -1797,7 +2213,8 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, // If we couldn't allocate a register from spilling, there is probably some // invalid inline assembly. The base class wil report it. if (Stage >= RS_Done || !VirtReg.isSpillable()) - return ~0u; + return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters, + Depth); // Try splitting VirtReg or interferences. unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs); @@ -1823,6 +2240,9 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { << "********** Function: " << mf.getName() << '\n'); MF = &mf; + TRI = MF->getTarget().getRegisterInfo(); + TII = MF->getTarget().getInstrInfo(); + RCI.runOnMachineFunction(mf); if (VerifyEnabled) MF->verify(this, "Before greedy register allocator"); diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index 88c8201..96dbd9a 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -34,7 +34,6 @@ #include "llvm/CodeGen/RegAllocPBQP.h" #include "RegisterCoalescer.h" #include "Spiller.h" -#include "llvm/ADT/OwningPtr.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -45,9 +44,6 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PBQP/Graph.h" -#include "llvm/CodeGen/PBQP/HeuristicSolver.h" -#include "llvm/CodeGen/PBQP/Heuristics/Briggs.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/IR/Module.h" @@ -91,8 +87,8 @@ public: static char ID; /// Construct a PBQP register allocator. - RegAllocPBQP(OwningPtr<PBQPBuilder> &b, char *cPassID=0) - : MachineFunctionPass(ID), builder(b.take()), customPassID(cPassID) { + RegAllocPBQP(std::unique_ptr<PBQPBuilder> &b, char *cPassID=0) + : MachineFunctionPass(ID), builder(b.release()), customPassID(cPassID) { initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); initializeLiveStacksPass(*PassRegistry::getPassRegistry()); @@ -100,15 +96,15 @@ public: } /// Return the pass name. - virtual const char* getPassName() const { + const char* getPassName() const override { return "PBQP Register Allocator"; } /// PBQP analysis usage. - virtual void getAnalysisUsage(AnalysisUsage &au) const; + void getAnalysisUsage(AnalysisUsage &au) const override; /// Perform register allocation - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; private: @@ -120,8 +116,7 @@ private: typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap; typedef std::set<unsigned> RegSet; - - OwningPtr<PBQPBuilder> builder; + std::unique_ptr<PBQPBuilder> builder; char *customPassID; @@ -132,7 +127,7 @@ private: MachineRegisterInfo *mri; const MachineBlockFrequencyInfo *mbfi; - OwningPtr<Spiller> spiller; + std::unique_ptr<Spiller> spiller; LiveIntervals *lis; LiveStacks *lss; VirtRegMap *vrm; @@ -157,13 +152,13 @@ char RegAllocPBQP::ID = 0; } // End anonymous namespace. -unsigned PBQPRAProblem::getVRegForNode(PBQP::Graph::NodeId node) const { +unsigned PBQPRAProblem::getVRegForNode(PBQPRAGraph::NodeId node) const { Node2VReg::const_iterator vregItr = node2VReg.find(node); assert(vregItr != node2VReg.end() && "No vreg for node."); return vregItr->second; } -PBQP::Graph::NodeId PBQPRAProblem::getNodeForVReg(unsigned vreg) const { +PBQPRAGraph::NodeId PBQPRAProblem::getNodeForVReg(unsigned vreg) const { VReg2Node::const_iterator nodeItr = vreg2Node.find(vreg); assert(nodeItr != vreg2Node.end() && "No node for vreg."); return nodeItr->second; @@ -194,8 +189,8 @@ PBQPRAProblem *PBQPBuilder::build(MachineFunction *mf, const LiveIntervals *lis, MachineRegisterInfo *mri = &mf->getRegInfo(); const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo(); - OwningPtr<PBQPRAProblem> p(new PBQPRAProblem()); - PBQP::Graph &g = p->getGraph(); + std::unique_ptr<PBQPRAProblem> p(new PBQPRAProblem()); + PBQPRAGraph &g = p->getGraph(); RegSet pregs; // Collect the set of preg intervals, record that they're used in the MF. @@ -245,17 +240,19 @@ PBQPRAProblem *PBQPBuilder::build(MachineFunction *mf, const LiveIntervals *lis, vrAllowed.push_back(preg); } - // Construct the node. - PBQP::Graph::NodeId node = - g.addNode(PBQP::Vector(vrAllowed.size() + 1, 0)); - - // Record the mapping and allowed set in the problem. - p->recordVReg(vreg, node, vrAllowed.begin(), vrAllowed.end()); + PBQP::Vector nodeCosts(vrAllowed.size() + 1, 0); PBQP::PBQPNum spillCost = (vregLI->weight != 0.0) ? vregLI->weight : std::numeric_limits<PBQP::PBQPNum>::min(); - addSpillCosts(g.getNodeCosts(node), spillCost); + addSpillCosts(nodeCosts, spillCost); + + // Construct the node. + PBQPRAGraph::NodeId nId = g.addNode(std::move(nodeCosts)); + + // Record the mapping and allowed set in the problem. + p->recordVReg(vreg, nId, vrAllowed.begin(), vrAllowed.end()); + } for (RegSet::const_iterator vr1Itr = vregs.begin(), vrEnd = vregs.end(); @@ -264,24 +261,24 @@ PBQPRAProblem *PBQPBuilder::build(MachineFunction *mf, const LiveIntervals *lis, const LiveInterval &l1 = lis->getInterval(vr1); const PBQPRAProblem::AllowedSet &vr1Allowed = p->getAllowedSet(vr1); - for (RegSet::const_iterator vr2Itr = llvm::next(vr1Itr); - vr2Itr != vrEnd; ++vr2Itr) { + for (RegSet::const_iterator vr2Itr = std::next(vr1Itr); vr2Itr != vrEnd; + ++vr2Itr) { unsigned vr2 = *vr2Itr; const LiveInterval &l2 = lis->getInterval(vr2); const PBQPRAProblem::AllowedSet &vr2Allowed = p->getAllowedSet(vr2); assert(!l2.empty() && "Empty interval in vreg set?"); if (l1.overlaps(l2)) { - PBQP::Graph::EdgeId edge = - g.addEdge(p->getNodeForVReg(vr1), p->getNodeForVReg(vr2), - PBQP::Matrix(vr1Allowed.size()+1, vr2Allowed.size()+1, 0)); + PBQP::Matrix edgeCosts(vr1Allowed.size()+1, vr2Allowed.size()+1, 0); + addInterferenceCosts(edgeCosts, vr1Allowed, vr2Allowed, tri); - addInterferenceCosts(g.getEdgeCosts(edge), vr1Allowed, vr2Allowed, tri); + g.addEdge(p->getNodeForVReg(vr1), p->getNodeForVReg(vr2), + std::move(edgeCosts)); } } } - return p.take(); + return p.release(); } void PBQPBuilder::addSpillCosts(PBQP::Vector &costVec, @@ -315,8 +312,8 @@ PBQPRAProblem *PBQPBuilderWithCoalescing::build(MachineFunction *mf, const MachineBlockFrequencyInfo *mbfi, const RegSet &vregs) { - OwningPtr<PBQPRAProblem> p(PBQPBuilder::build(mf, lis, mbfi, vregs)); - PBQP::Graph &g = p->getGraph(); + std::unique_ptr<PBQPRAProblem> p(PBQPBuilder::build(mf, lis, mbfi, vregs)); + PBQPRAGraph &g = p->getGraph(); const TargetMachine &tm = mf->getTarget(); CoalescerPair cp(*tm.getRegisterInfo()); @@ -348,8 +345,7 @@ PBQPRAProblem *PBQPBuilderWithCoalescing::build(MachineFunction *mf, // value plucked randomly out of the air. PBQP::PBQPNum cBenefit = - copyFactor * LiveIntervals::getSpillWeight(false, true, - mbfi->getBlockFreq(mbb)); + copyFactor * LiveIntervals::getSpillWeight(false, true, mbfi, mi); if (cp.isPhys()) { if (!mf->getRegInfo().isAllocatable(dst)) { @@ -363,33 +359,37 @@ PBQPRAProblem *PBQPBuilderWithCoalescing::build(MachineFunction *mf, } if (pregOpt < allowed.size()) { ++pregOpt; // +1 to account for spill option. - PBQP::Graph::NodeId node = p->getNodeForVReg(src); - addPhysRegCoalesce(g.getNodeCosts(node), pregOpt, cBenefit); + PBQPRAGraph::NodeId node = p->getNodeForVReg(src); + llvm::dbgs() << "Reading node costs for node " << node << "\n"; + llvm::dbgs() << "Source node: " << &g.getNodeCosts(node) << "\n"; + PBQP::Vector newCosts(g.getNodeCosts(node)); + addPhysRegCoalesce(newCosts, pregOpt, cBenefit); + g.setNodeCosts(node, newCosts); } } else { const PBQPRAProblem::AllowedSet *allowed1 = &p->getAllowedSet(dst); const PBQPRAProblem::AllowedSet *allowed2 = &p->getAllowedSet(src); - PBQP::Graph::NodeId node1 = p->getNodeForVReg(dst); - PBQP::Graph::NodeId node2 = p->getNodeForVReg(src); - PBQP::Graph::EdgeId edge = g.findEdge(node1, node2); + PBQPRAGraph::NodeId node1 = p->getNodeForVReg(dst); + PBQPRAGraph::NodeId node2 = p->getNodeForVReg(src); + PBQPRAGraph::EdgeId edge = g.findEdge(node1, node2); if (edge == g.invalidEdgeId()) { - edge = g.addEdge(node1, node2, PBQP::Matrix(allowed1->size() + 1, - allowed2->size() + 1, - 0)); + PBQP::Matrix costs(allowed1->size() + 1, allowed2->size() + 1, 0); + addVirtRegCoalesce(costs, *allowed1, *allowed2, cBenefit); + g.addEdge(node1, node2, costs); } else { - if (g.getEdgeNode1(edge) == node2) { + if (g.getEdgeNode1Id(edge) == node2) { std::swap(node1, node2); std::swap(allowed1, allowed2); } + PBQP::Matrix costs(g.getEdgeCosts(edge)); + addVirtRegCoalesce(costs, *allowed1, *allowed2, cBenefit); + g.setEdgeCosts(edge, costs); } - - addVirtRegCoalesce(g.getEdgeCosts(edge), *allowed1, *allowed2, - cBenefit); } } } - return p.take(); + return p.release(); } void PBQPBuilderWithCoalescing::addPhysRegCoalesce(PBQP::Vector &costVec, @@ -472,14 +472,12 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem, // Clear the existing allocation. vrm->clearAllVirt(); - const PBQP::Graph &g = problem.getGraph(); + const PBQPRAGraph &g = problem.getGraph(); // Iterate over the nodes mapping the PBQP solution to a register // assignment. - for (PBQP::Graph::NodeItr nodeItr = g.nodesBegin(), - nodeEnd = g.nodesEnd(); - nodeItr != nodeEnd; ++nodeItr) { - unsigned vreg = problem.getVRegForNode(*nodeItr); - unsigned alloc = solution.getSelection(*nodeItr); + for (auto NId : g.nodeIds()) { + unsigned vreg = problem.getVRegForNode(NId); + unsigned alloc = solution.getSelection(NId); if (problem.isPRegOption(vreg, alloc)) { unsigned preg = problem.getPRegForOption(vreg, alloc); @@ -587,8 +585,8 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { while (!pbqpAllocComplete) { DEBUG(dbgs() << " PBQP Regalloc round " << round << ":\n"); - OwningPtr<PBQPRAProblem> problem( - builder->build(mf, lis, mbfi, vregsToAlloc)); + std::unique_ptr<PBQPRAProblem> problem( + builder->build(mf, lis, mbfi, vregsToAlloc)); #ifndef NDEBUG if (pbqpDumpGraphs) { @@ -596,7 +594,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { rs << round; std::string graphFileName(fqn + "." + rs.str() + ".pbqpgraph"); std::string tmp; - raw_fd_ostream os(graphFileName.c_str(), tmp); + raw_fd_ostream os(graphFileName.c_str(), tmp, sys::fs::F_Text); DEBUG(dbgs() << "Dumping graph for round " << round << " to \"" << graphFileName << "\"\n"); problem->getGraph().dump(os); @@ -604,8 +602,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { #endif PBQP::Solution solution = - PBQP::HeuristicSolver<PBQP::Heuristics::Briggs>::solve( - problem->getGraph()); + PBQP::RegAlloc::solve(problem->getGraph()); pbqpAllocComplete = mapPBQPToRegAlloc(*problem, solution); @@ -623,14 +620,14 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { return true; } -FunctionPass* llvm::createPBQPRegisterAllocator( - OwningPtr<PBQPBuilder> &builder, - char *customPassID) { +FunctionPass * +llvm::createPBQPRegisterAllocator(std::unique_ptr<PBQPBuilder> &builder, + char *customPassID) { return new RegAllocPBQP(builder, customPassID); } FunctionPass* llvm::createDefaultPBQPRegisterAllocator() { - OwningPtr<PBQPBuilder> Builder; + std::unique_ptr<PBQPBuilder> Builder; if (pbqpCoalescing) Builder.reset(new PBQPBuilderWithCoalescing()); else diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp index cacd7de..aa84446 100644 --- a/lib/CodeGen/RegisterClassInfo.cpp +++ b/lib/CodeGen/RegisterClassInfo.cpp @@ -8,9 +8,9 @@ //===----------------------------------------------------------------------===// // // This file implements the RegisterClassInfo class which provides dynamic -// information about target register classes. Callee saved and reserved -// registers depends on calling conventions and other dynamic information, so -// some things cannot be determined statically. +// information about target register classes. Callee-saved vs. caller-saved and +// reserved registers depend on calling conventions and other dynamic +// information, so some things cannot be determined statically. // //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index dd86c1f..682c26c 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -15,7 +15,6 @@ #define DEBUG_TYPE "regalloc" #include "RegisterCoalescer.h" -#include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" @@ -112,7 +111,7 @@ namespace { void eliminateDeadDefs(); /// LiveRangeEdit callback. - void LRE_WillEraseInstruction(MachineInstr *MI); + void LRE_WillEraseInstruction(MachineInstr *MI) override; /// coalesceLocals - coalesce the LocalWorkList. void coalesceLocals(); @@ -188,15 +187,15 @@ namespace { initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); } - virtual void getAnalysisUsage(AnalysisUsage &AU) const; + void getAnalysisUsage(AnalysisUsage &AU) const override; - virtual void releaseMemory(); + void releaseMemory() override; /// runOnMachineFunction - pass entry point - virtual bool runOnMachineFunction(MachineFunction&); + bool runOnMachineFunction(MachineFunction&) override; /// print - Implement the dump method. - virtual void print(raw_ostream &O, const Module* = 0) const; + void print(raw_ostream &O, const Module* = 0) const override; }; } /// end anonymous namespace @@ -283,7 +282,6 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) { if (SrcSub) { Dst = TRI.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src)); if (!Dst) return false; - SrcSub = 0; } else if (!MRI.getRegClass(Src)->contains(Dst)) { return false; } @@ -622,16 +620,15 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, // If some of the uses of IntA.reg is already coalesced away, return false. // It's not possible to determine whether it's safe to perform the coalescing. - for (MachineRegisterInfo::use_nodbg_iterator UI = - MRI->use_nodbg_begin(IntA.reg), - UE = MRI->use_nodbg_end(); UI != UE; ++UI) { - MachineInstr *UseMI = &*UI; + for (MachineOperand &MO : MRI->use_nodbg_operands(IntA.reg)) { + MachineInstr *UseMI = MO.getParent(); + unsigned OpNo = &MO - &UseMI->getOperand(0); SlotIndex UseIdx = LIS->getInstructionIndex(UseMI); LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx); if (US == IntA.end() || US->valno != AValNo) continue; // If this use is tied to a def, we can't rewrite the register. - if (UseMI->isRegTiedToDefOperand(UI.getOperandNo())) + if (UseMI->isRegTiedToDefOperand(OpNo)) return false; } @@ -669,8 +666,8 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, // Update uses of IntA of the specific Val# with IntB. for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(IntA.reg), UE = MRI->use_end(); UI != UE;) { - MachineOperand &UseMO = UI.getOperand(); - MachineInstr *UseMI = &*UI; + MachineOperand &UseMO = *UI; + MachineInstr *UseMI = UseMO.getParent(); ++UI; if (UseMI->isDebugValue()) { // FIXME These don't have an instruction index. Not clear we have enough @@ -769,6 +766,14 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, if (DstOperand.getSubReg() && !DstOperand.isUndef()) return false; + // If both SrcIdx and DstIdx are set, correct rematerialization would widen + // the register substantially (beyond both source and dest size). This is bad + // for performance since it can cascade through a function, introducing many + // extra spills and fills (e.g. ARM can easily end up copying QQQQPR registers + // around after a few subreg copies). + if (SrcIdx && DstIdx) + return false; + const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI, *MF); if (!DefMI->isImplicitDef()) { if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { @@ -793,9 +798,9 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, MachineBasicBlock *MBB = CopyMI->getParent(); MachineBasicBlock::iterator MII = - llvm::next(MachineBasicBlock::iterator(CopyMI)); + std::next(MachineBasicBlock::iterator(CopyMI)); TII->reMaterialize(*MBB, MII, DstReg, SrcIdx, DefMI, *TRI); - MachineInstr *NewMI = prior(MII); + MachineInstr *NewMI = std::prev(MII); LIS->ReplaceMachineInstrInMaps(CopyMI, NewMI); CopyMI->eraseFromParent(); @@ -816,31 +821,19 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, } if (TargetRegisterInfo::isVirtualRegister(DstReg)) { + const TargetRegisterClass *NewRC = CP.getNewRC(); unsigned NewIdx = NewMI->getOperand(0).getSubReg(); - const TargetRegisterClass *RCForInst; + if (NewIdx) - RCForInst = TRI->getMatchingSuperRegClass(MRI->getRegClass(DstReg), DefRC, - NewIdx); - - if (MRI->constrainRegClass(DstReg, DefRC)) { - // The materialized instruction is quite capable of setting DstReg - // directly, but it may still have a now-trivial subregister index which - // we should clear. - NewMI->getOperand(0).setSubReg(0); - } else if (NewIdx && RCForInst) { - // The subreg index on NewMI is essential; we still have to make sure - // DstReg:idx is in a class that NewMI can use. - MRI->constrainRegClass(DstReg, RCForInst); - } else { - // DstReg is actually incompatible with NewMI, we have to move to a - // super-reg's class. This could come from a sequence like: - // GR32 = MOV32r0 - // GR8 = COPY GR32:sub_8 - MRI->setRegClass(DstReg, CP.getNewRC()); - updateRegDefsUses(DstReg, DstReg, DstIdx); - NewMI->getOperand(0).setSubReg( - TRI->composeSubRegIndices(SrcIdx, DefMI->getOperand(0).getSubReg())); - } + NewRC = TRI->getMatchingSuperRegClass(NewRC, DefRC, NewIdx); + else + NewRC = TRI->getCommonSubClass(NewRC, DefRC); + + assert(NewRC && "subreg chosen for remat incompatible with instruction"); + MRI->setRegClass(DstReg, NewRC); + + updateRegDefsUses(DstReg, DstReg, DstIdx); + NewMI->getOperand(0).setSubReg(NewIdx); } else if (NewMI->getOperand(0).getReg() != CopyDstReg) { // The New instruction may be defining a sub-register of what's actually // been asked for. If so it must implicitly define the whole thing. @@ -916,10 +909,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI, DstInt->removeValNo(DeadVNI); // Find new undef uses. - for (MachineRegisterInfo::reg_nodbg_iterator - I = MRI->reg_nodbg_begin(DstInt->reg), E = MRI->reg_nodbg_end(); - I != E; ++I) { - MachineOperand &MO = I.getOperand(); + for (MachineOperand &MO : MRI->reg_nodbg_operands(DstInt->reg)) { if (MO.isDef() || MO.isUndef()) continue; MachineInstr *MI = MO.getParent(); @@ -944,8 +934,11 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, LiveInterval *DstInt = DstIsPhys ? 0 : &LIS->getInterval(DstReg); SmallPtrSet<MachineInstr*, 8> Visited; - for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(SrcReg); - MachineInstr *UseMI = I.skipInstruction();) { + for (MachineRegisterInfo::reg_instr_iterator + I = MRI->reg_instr_begin(SrcReg), E = MRI->reg_instr_end(); + I != E; ) { + MachineInstr *UseMI = &*(I++); + // Each instruction can only be rewritten once because sub-register // composition is not always idempotent. When SrcReg != DstReg, rewriting // the UseMI operands removes them from the SrcReg use-def chain, but when @@ -956,7 +949,7 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, SmallVector<unsigned,8> Ops; bool Reads, Writes; - tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops); + std::tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops); // If SrcReg wasn't read, it may still be the case that DstReg is live-in // because SrcReg is a sub-register. diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp index 092ecdd..97817da 100644 --- a/lib/CodeGen/RegisterPressure.cpp +++ b/lib/CodeGen/RegisterPressure.cpp @@ -506,7 +506,13 @@ bool RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses, DeadDef = LRQ.isDeadDef(); } } - if (!DeadDef) { + if (DeadDef) { + // LiveIntervals knows this is a dead even though it's MachineOperand is + // not flagged as such. Since this register will not be recorded as + // live-out, increase its PDiff value to avoid underflowing pressure. + if (PDiff) + PDiff->addPressureChange(Reg, false, MRI); + } else { if (LiveRegs.erase(Reg)) decreaseRegPressure(Reg); else @@ -876,9 +882,9 @@ static bool findUseBetween(unsigned Reg, SlotIndex PriorUseIdx, SlotIndex NextUseIdx, const MachineRegisterInfo *MRI, const LiveIntervals *LIS) { - for (MachineRegisterInfo::use_nodbg_iterator - UI = MRI->use_nodbg_begin(Reg), UE = MRI->use_nodbg_end(); - UI != UE; UI.skipInstruction()) { + for (MachineRegisterInfo::use_instr_nodbg_iterator + UI = MRI->use_instr_nodbg_begin(Reg), + UE = MRI->use_instr_nodbg_end(); UI != UE; ++UI) { const MachineInstr* MI = &*UI; if (MI->isDebugValue()) continue; diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index 75ebdaa..bfd26dc 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -175,7 +175,7 @@ void RegScavenger::forward() { Tracking = true; } else { assert(MBBI != MBB->end() && "Already past the end of the basic block!"); - MBBI = llvm::next(MBBI); + MBBI = std::next(MBBI); } assert(MBBI != MBB->end() && "Already at the end of the basic block!"); @@ -415,7 +415,7 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, "Cannot scavenge register without an emergency spill slot!"); TII->storeRegToStackSlot(*MBB, I, SReg, true, Scavenged[SI].FrameIndex, RC, TRI); - MachineBasicBlock::iterator II = prior(I); + MachineBasicBlock::iterator II = std::prev(I); unsigned FIOperandNum = getFrameIndexOperandNum(II); TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this); @@ -423,13 +423,13 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, // Restore the scavenged register before its use (or first terminator). TII->loadRegFromStackSlot(*MBB, UseMI, SReg, Scavenged[SI].FrameIndex, RC, TRI); - II = prior(UseMI); + II = std::prev(UseMI); FIOperandNum = getFrameIndexOperandNum(II); TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this); } - Scavenged[SI].Restore = prior(UseMI); + Scavenged[SI].Restore = std::prev(UseMI); // Doing this here leads to infinite regress. // Scavenged[SI].Reg = SReg; diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index 75e3790..d08eb65 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -63,7 +63,7 @@ const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const { /// not already. It also adds the current node as a successor of the /// specified node. bool SUnit::addPred(const SDep &D, bool Required) { - // If this node already has this depenence, don't add a redundant one. + // If this node already has this dependence, don't add a redundant one. for (SmallVectorImpl<SDep>::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) { // Zero-latency weak edges may be added purely for heuristic ordering. Don't @@ -301,8 +301,8 @@ void SUnit::biasCriticalPath() { SUnit::pred_iterator BestI = Preds.begin(); unsigned MaxDepth = BestI->getSUnit()->getDepth(); - for (SUnit::pred_iterator - I = llvm::next(BestI), E = Preds.end(); I != E; ++I) { + for (SUnit::pred_iterator I = std::next(BestI), E = Preds.end(); I != E; + ++I) { if (I->getKind() == SDep::Data && I->getSUnit()->getDepth() > MaxDepth) BestI = I; } diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 7f1f9c4..c8328ad 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" @@ -44,13 +45,24 @@ static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Enable use of AA during MI GAD construction")); +// FIXME: Enable the use of TBAA. There are two known issues preventing this: +// 1. Stack coloring does not update TBAA when merging allocas +// 2. CGP inserts ptrtoint/inttoptr pairs when sinking address computations. +// Because BasicAA does not handle inttoptr, we'll often miss basic type +// punning idioms that we need to catch so we don't miscompile real-world +// code. +static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, + cl::init(false), cl::desc("Enable use of TBAA during MI GAD construction")); + ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo &mli, const MachineDominatorTree &mdt, bool IsPostRAFlag, + bool RemoveKillFlags, LiveIntervals *lis) : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), LIS(lis), - IsPostRA(IsPostRAFlag), CanHandleTerminators(false), FirstDbgValue(0) { + IsPostRA(IsPostRAFlag), RemoveKillFlags(RemoveKillFlags), + CanHandleTerminators(false), FirstDbgValue(0) { assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals"); DbgValues.clear(); assert(!(IsPostRA && MRI.getNumVirtRegs()) && @@ -136,31 +148,32 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI, if (!V) return; + if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) { + // For now, ignore PseudoSourceValues which may alias LLVM IR values + // because the code that uses this function has no way to cope with + // such aliases. + if (!PSV->isAliased(MFI)) { + bool MayAlias = PSV->mayAlias(MFI); + Objects.push_back(UnderlyingObjectsVector::value_type(V, MayAlias)); + } + return; + } + SmallVector<Value *, 4> Objs; getUnderlyingObjects(V, Objs); for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end(); I != IE; ++I) { - bool MayAlias = true; V = *I; - if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) { - // For now, ignore PseudoSourceValues which may alias LLVM IR values - // because the code that uses this function has no way to cope with - // such aliases. - - if (PSV->isAliased(MFI)) { - Objects.clear(); - return; - } + assert(!isa<PseudoSourceValue>(V) && "Underlying value is a stack slot!"); - MayAlias = PSV->mayAlias(MFI); - } else if (!isIdentifiedObject(V)) { + if (!isIdentifiedObject(V)) { Objects.clear(); return; } - Objects.push_back(UnderlyingObjectsVector::value_type(V, MayAlias)); + Objects.push_back(UnderlyingObjectsVector::value_type(V, true)); } } @@ -284,8 +297,8 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { /// this SUnit to following instructions in the same scheduling region that /// depend the physical register referenced at OperIdx. void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { - const MachineInstr *MI = SU->getInstr(); - const MachineOperand &MO = MI->getOperand(OperIdx); + MachineInstr *MI = SU->getInstr(); + MachineOperand &MO = MI->getOperand(OperIdx); // Optionally add output and anti dependencies. For anti // dependencies we use a latency of 0 because for a multi-issue @@ -323,6 +336,8 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { // retrieve the existing SUnits list for this register's uses. // Push this SUnit on the use list. Uses.insert(PhysRegSUOper(SU, OperIdx, MO.getReg())); + if (RemoveKillFlags) + MO.setIsKill(false); } else { addPhysRegDataDeps(SU, OperIdx); @@ -507,6 +522,10 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI, if (MIa == MIb) return false; + // FIXME: Need to handle multiple memory operands to support all targets. + if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand()) + return true; + if (isUnsafeMemoryObject(MIa, MFI) || isUnsafeMemoryObject(MIb, MFI)) return true; @@ -522,10 +541,6 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI, MachineMemOperand *MMOa = *MIa->memoperands_begin(); MachineMemOperand *MMOb = *MIb->memoperands_begin(); - // FIXME: Need to handle multiple memory operands to support all targets. - if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand()) - llvm_unreachable("Multiple memory operands."); - // The following interface to AA is fashioned after DAGCombiner::isAlias // and operates with MachineMemOperand offset with some important // assumptions: @@ -550,10 +565,10 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI, int64_t Overlapb = MMOb->getSize() + MMOb->getOffset() - MinOffset; AliasAnalysis::AliasResult AAResult = AA->alias( - AliasAnalysis::Location(MMOa->getValue(), Overlapa, - MMOa->getTBAAInfo()), - AliasAnalysis::Location(MMOb->getValue(), Overlapb, - MMOb->getTBAAInfo())); + AliasAnalysis::Location(MMOa->getValue(), Overlapa, + UseTBAA ? MMOa->getTBAAInfo() : 0), + AliasAnalysis::Location(MMOb->getValue(), Overlapb, + UseTBAA ? MMOb->getTBAAInfo() : 0)); return (AAResult != AliasAnalysis::NoAlias); } @@ -687,10 +702,32 @@ void ScheduleDAGInstrs::initSUnits() { // Assign the Latency field of SU using target-provided information. SU->Latency = SchedModel.computeInstrLatency(SU->getInstr()); + + // If this SUnit uses an unbuffered resource, mark it as such. + // These resources are used for in-order execution pipelines within an + // out-of-order core and are identified by BufferSize=1. BufferSize=0 is + // used for dispatch/issue groups and is not considered here. + if (SchedModel.hasInstrSchedModel()) { + const MCSchedClassDesc *SC = getSchedClass(SU); + for (TargetSchedModel::ProcResIter + PI = SchedModel.getWriteProcResBegin(SC), + PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) { + switch (SchedModel.getProcResource(PI->ProcResourceIdx)->BufferSize) { + case 0: + SU->hasReservedResource = true; + break; + case 1: + SU->isUnbuffered = true; + break; + default: + break; + } + } + } } } -/// If RegPressure is non null, compute register pressure as a side effect. The +/// If RegPressure is non-null, compute register pressure as a side effect. The /// DAG builder is an efficient place to do it because it already visits /// operands. void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, @@ -720,7 +757,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, // so that they can be given more precise dependencies. We track // separately the known memory locations that may alias and those // that are known not to alias - MapVector<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs; + MapVector<const Value *, std::vector<SUnit *> > AliasMemDefs, NonAliasMemDefs; MapVector<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses; std::set<SUnit*> RejectMemNodes; @@ -747,7 +784,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, MachineInstr *DbgMI = NULL; for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin; MII != MIE; --MII) { - MachineInstr *MI = prior(MII); + MachineInstr *MI = std::prev(MII); if (MI && DbgMI) { DbgValues.push_back(std::make_pair(DbgMI, MI)); DbgMI = NULL; @@ -763,11 +800,13 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, if (RPTracker) { PressureDiff *PDiff = PDiffs ? &(*PDiffs)[SU->NodeNum] : 0; RPTracker->recede(/*LiveUses=*/0, PDiff); - assert(RPTracker->getPos() == prior(MII) && "RPTracker can't find MI"); + assert(RPTracker->getPos() == std::prev(MII) && + "RPTracker can't find MI"); } - assert((CanHandleTerminators || (!MI->isTerminator() && !MI->isLabel())) && - "Cannot schedule terminators or labels!"); + assert( + (CanHandleTerminators || (!MI->isTerminator() && !MI->isPosition())) && + "Cannot schedule terminators or labels!"); // Add register-based dependencies (data, anti, and output). bool HasVRegDef = false; @@ -815,9 +854,11 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, if (isGlobalMemoryObject(AA, MI)) { // Be conservative with these and add dependencies on all memory // references, even those that are known to not alias. - for (MapVector<const Value *, SUnit *>::iterator I = + for (MapVector<const Value *, std::vector<SUnit *> >::iterator I = NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) { - I->second->addPred(SDep(SU, SDep::Barrier)); + for (unsigned i = 0, e = I->second.size(); i != e; ++i) { + I->second[i]->addPred(SDep(SU, SDep::Barrier)); + } } for (MapVector<const Value *, std::vector<SUnit *> >::iterator I = NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) { @@ -853,9 +894,11 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) addChainDependency(AAForDep, MFI, SU, PendingLoads[k], RejectMemNodes, TrueMemOrderLatency); - for (MapVector<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(), - E = AliasMemDefs.end(); I != E; ++I) - addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes); + for (MapVector<const Value *, std::vector<SUnit *> >::iterator I = + AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) { + for (unsigned i = 0, e = I->second.size(); i != e; ++i) + addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes); + } for (MapVector<const Value *, std::vector<SUnit *> >::iterator I = AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) { for (unsigned i = 0, e = I->second.size(); i != e; ++i) @@ -887,19 +930,29 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, // A store to a specific PseudoSourceValue. Add precise dependencies. // Record the def in MemDefs, first adding a dep if there is // an existing def. - MapVector<const Value *, SUnit *>::iterator I = + MapVector<const Value *, std::vector<SUnit *> >::iterator I = ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); - MapVector<const Value *, SUnit *>::iterator IE = + MapVector<const Value *, std::vector<SUnit *> >::iterator IE = ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); if (I != IE) { - addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes, - 0, true); - I->second = SU; + for (unsigned i = 0, e = I->second.size(); i != e; ++i) + addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes, + 0, true); + + // If we're not using AA, then we only need one store per object. + if (!AAForDep) + I->second.clear(); + I->second.push_back(SU); } else { - if (ThisMayAlias) - AliasMemDefs[V] = SU; - else - NonAliasMemDefs[V] = SU; + if (ThisMayAlias) { + if (!AAForDep) + AliasMemDefs[V].clear(); + AliasMemDefs[V].push_back(SU); + } else { + if (!AAForDep) + NonAliasMemDefs[V].clear(); + NonAliasMemDefs[V].push_back(SU); + } } // Handle the uses in MemUses, if there are any. MapVector<const Value *, std::vector<SUnit *> >::iterator J = @@ -949,9 +1002,11 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, if (Objs.empty()) { // A load with no underlying object. Depend on all // potentially aliasing stores. - for (MapVector<const Value *, SUnit *>::iterator I = + for (MapVector<const Value *, std::vector<SUnit *> >::iterator I = AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) - addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes); + for (unsigned i = 0, e = I->second.size(); i != e; ++i) + addChainDependency(AAForDep, MFI, SU, I->second[i], + RejectMemNodes); PendingLoads.push_back(SU); MayAlias = true; @@ -968,13 +1023,14 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, MayAlias = true; // A load from a specific PseudoSourceValue. Add precise dependencies. - MapVector<const Value *, SUnit *>::iterator I = + MapVector<const Value *, std::vector<SUnit *> >::iterator I = ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); - MapVector<const Value *, SUnit *>::iterator IE = + MapVector<const Value *, std::vector<SUnit *> >::iterator IE = ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); if (I != IE) - addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes, - 0, true); + for (unsigned i = 0, e = I->second.size(); i != e; ++i) + addChainDependency(AAForDep, MFI, SU, I->second[i], + RejectMemNodes, 0, true); if (ThisMayAlias) AliasMemUses[V].push_back(SU); else @@ -999,6 +1055,145 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, PendingLoads.clear(); } +/// \brief Initialize register live-range state for updating kills. +void ScheduleDAGInstrs::startBlockForKills(MachineBasicBlock *BB) { + // Start with no live registers. + LiveRegs.reset(); + + // Examine the live-in regs of all successors. + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); SI != SE; ++SI) { + for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), + E = (*SI)->livein_end(); I != E; ++I) { + unsigned Reg = *I; + // Repeat, for reg and all subregs. + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + LiveRegs.set(*SubRegs); + } + } +} + +bool ScheduleDAGInstrs::toggleKillFlag(MachineInstr *MI, MachineOperand &MO) { + // Setting kill flag... + if (!MO.isKill()) { + MO.setIsKill(true); + return false; + } + + // If MO itself is live, clear the kill flag... + if (LiveRegs.test(MO.getReg())) { + MO.setIsKill(false); + return false; + } + + // If any subreg of MO is live, then create an imp-def for that + // subreg and keep MO marked as killed. + MO.setIsKill(false); + bool AllDead = true; + const unsigned SuperReg = MO.getReg(); + MachineInstrBuilder MIB(MF, MI); + for (MCSubRegIterator SubRegs(SuperReg, TRI); SubRegs.isValid(); ++SubRegs) { + if (LiveRegs.test(*SubRegs)) { + MIB.addReg(*SubRegs, RegState::ImplicitDefine); + AllDead = false; + } + } + + if(AllDead) + MO.setIsKill(true); + return false; +} + +// FIXME: Reuse the LivePhysRegs utility for this. +void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) { + DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n'); + + LiveRegs.resize(TRI->getNumRegs()); + BitVector killedRegs(TRI->getNumRegs()); + + startBlockForKills(MBB); + + // Examine block from end to start... + unsigned Count = MBB->size(); + for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin(); + I != E; --Count) { + MachineInstr *MI = --I; + if (MI->isDebugValue()) + continue; + + // Update liveness. Registers that are defed but not used in this + // instruction are now dead. Mark register and all subregs as they + // are completely defined. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isRegMask()) + LiveRegs.clearBitsNotInMask(MO.getRegMask()); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + if (!MO.isDef()) continue; + // Ignore two-addr defs. + if (MI->isRegTiedToUseOperand(i)) continue; + + // Repeat for reg and all subregs. + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + LiveRegs.reset(*SubRegs); + } + + // Examine all used registers and set/clear kill flag. When a + // register is used multiple times we only set the kill flag on + // the first use. Don't set kill flags on undef operands. + killedRegs.reset(); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; + unsigned Reg = MO.getReg(); + if ((Reg == 0) || MRI.isReserved(Reg)) continue; + + bool kill = false; + if (!killedRegs.test(Reg)) { + kill = true; + // A register is not killed if any subregs are live... + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + if (LiveRegs.test(*SubRegs)) { + kill = false; + break; + } + } + + // If subreg is not live, then register is killed if it became + // live in this instruction + if (kill) + kill = !LiveRegs.test(Reg); + } + + if (MO.isKill() != kill) { + DEBUG(dbgs() << "Fixing " << MO << " in "); + // Warning: toggleKillFlag may invalidate MO. + toggleKillFlag(MI, MO); + DEBUG(MI->dump()); + } + + killedRegs.set(Reg); + } + + // Mark any used register (that is not using undef) and subregs as + // now live... + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; + unsigned Reg = MO.getReg(); + if ((Reg == 0) || MRI.isReserved(Reg)) continue; + + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + LiveRegs.set(*SubRegs); + } + } +} + void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) SU->getInstr()->dump(); @@ -1234,7 +1429,7 @@ public: const SDep *backtrack() { DFSStack.pop_back(); - return DFSStack.empty() ? 0 : llvm::prior(DFSStack.back().second); + return DFSStack.empty() ? 0 : std::prev(DFSStack.back().second); } const SUnit *getCurr() const { return DFSStack.back().first; } diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp index 8ddb3e8..f59c6cf 100644 --- a/lib/CodeGen/ScheduleDAGPrinter.cpp +++ b/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -14,7 +14,6 @@ #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" diff --git a/lib/CodeGen/SelectionDAG/Android.mk b/lib/CodeGen/SelectionDAG/Android.mk index 3f28e08..0e52ee3 100644 --- a/lib/CodeGen/SelectionDAG/Android.mk +++ b/lib/CodeGen/SelectionDAG/Android.mk @@ -41,6 +41,7 @@ include $(BUILD_HOST_STATIC_LIBRARY) # For the device # ===================================================== +ifneq (true,$(DISABLE_LLVM_DEVICE_BUILDS)) include $(CLEAR_VARS) LOCAL_SRC_FILES := $(codegen_selectiondag_SRC_FILES) @@ -52,3 +53,4 @@ LOCAL_MODULE_TAGS := optional include $(LLVM_DEVICE_BUILD_MK) include $(LLVM_GEN_INTRINSICS_MK) include $(BUILD_STATIC_LIBRARY) +endif diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 43f72c5..cc0c5fa 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -50,11 +50,28 @@ STATISTIC(SlicedLoads, "Number of load sliced"); namespace { static cl::opt<bool> CombinerAA("combiner-alias-analysis", cl::Hidden, - cl::desc("Turn on alias analysis during testing")); + cl::desc("Enable DAG combiner alias-analysis heuristics")); static cl::opt<bool> CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, - cl::desc("Include global information in alias analysis")); + cl::desc("Enable DAG combiner's use of IR alias analysis")); + +// FIXME: Enable the use of TBAA. There are two known issues preventing this: +// 1. Stack coloring does not update TBAA when merging allocas +// 2. CGP inserts ptrtoint/inttoptr pairs when sinking address computations. +// Because BasicAA does not handle inttoptr, we'll often miss basic type +// punning idioms that we need to catch so we don't miscompile real-world +// code. + static cl::opt<bool> + UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(false), + cl::desc("Enable DAG combiner's use of TBAA")); + +#ifndef NDEBUG + static cl::opt<std::string> + CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, + cl::desc("Only use DAG-combiner alias analysis in this" + " function")); +#endif /// Hidden option to stress test load slicing, i.e., when this option /// is enabled, load slicing bypasses most of its profitability guards. @@ -212,6 +229,7 @@ namespace { SDValue visitSHL(SDNode *N); SDValue visitSRA(SDNode *N); SDValue visitSRL(SDNode *N); + SDValue visitRotate(SDNode *N); SDValue visitCTLZ(SDNode *N); SDValue visitCTLZ_ZERO_UNDEF(SDNode *N); SDValue visitCTTZ(SDNode *N); @@ -257,11 +275,12 @@ namespace { SDValue visitCONCAT_VECTORS(SDNode *N); SDValue visitEXTRACT_SUBVECTOR(SDNode *N); SDValue visitVECTOR_SHUFFLE(SDNode *N); + SDValue visitINSERT_SUBVECTOR(SDNode *N); SDValue XformToShuffleWithZero(SDNode *N); SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS); - SDValue visitShiftByConstant(SDNode *N, unsigned Amt); + SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt); bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS); SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N); @@ -271,6 +290,11 @@ namespace { bool NotExtCompare = false); SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, SDLoc DL, bool foldBooleans = true); + + bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, + SDValue &CC) const; + bool isOneUseSetCC(SDValue N) const; + SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, unsigned HiOp); SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); @@ -280,6 +304,10 @@ namespace { SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits = true); SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); + SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, + SDValue InnerPos, SDValue InnerNeg, + unsigned PosOpcode, unsigned NegOpcode, + SDLoc DL); SDNode *MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL); SDValue ReduceLoadWidth(SDNode *N); SDValue ReduceLoadOpStoreWidth(SDNode *N); @@ -326,6 +354,14 @@ namespace { /// \return True if some memory operations were changed. bool MergeConsecutiveStores(StoreSDNode *N); + /// \brief Try to transform a truncation where C is a constant: + /// (trunc (and X, C)) -> (and (trunc X), (trunc C)) + /// + /// \p N needs to be a truncation and its first operand an AND. Other + /// requirements are checked by the function (e.g. that trunc is + /// single-use) and if missed an empty SDValue is returned. + SDValue distributeTruncateThroughAnd(SDNode *N); + public: DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), @@ -378,7 +414,7 @@ public: explicit WorkListRemover(DAGCombiner &dc) : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {} - virtual void NodeDeleted(SDNode *N, SDNode *E) { + void NodeDeleted(SDNode *N, SDNode *E) override { DC.removeFromWorkList(N); } }; @@ -566,79 +602,121 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, } } - // isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc -// that selects between the values 1 and 0, making it equivalent to a setcc. -// Also, set the incoming LHS, RHS, and CC references to the appropriate -// nodes based on the type of node we are checking. This simplifies life a -// bit for the callers. -static bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, - SDValue &CC) { +// that selects between the target values used for true and false, making it +// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to +// the appropriate nodes based on the type of node we are checking. This +// simplifies life a bit for the callers. +bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, + SDValue &CC) const { if (N.getOpcode() == ISD::SETCC) { LHS = N.getOperand(0); RHS = N.getOperand(1); CC = N.getOperand(2); return true; } - if (N.getOpcode() == ISD::SELECT_CC && - N.getOperand(2).getOpcode() == ISD::Constant && - N.getOperand(3).getOpcode() == ISD::Constant && - cast<ConstantSDNode>(N.getOperand(2))->getAPIntValue() == 1 && - cast<ConstantSDNode>(N.getOperand(3))->isNullValue()) { - LHS = N.getOperand(0); - RHS = N.getOperand(1); - CC = N.getOperand(4); - return true; - } - return false; + + if (N.getOpcode() != ISD::SELECT_CC || + !TLI.isConstTrueVal(N.getOperand(2).getNode()) || + !TLI.isConstFalseVal(N.getOperand(3).getNode())) + return false; + + LHS = N.getOperand(0); + RHS = N.getOperand(1); + CC = N.getOperand(4); + return true; } // isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only // one use. If this is true, it allows the users to invert the operation for // free when it is profitable to do so. -static bool isOneUseSetCC(SDValue N) { +bool DAGCombiner::isOneUseSetCC(SDValue N) const { SDValue N0, N1, N2; if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse()) return true; return false; } +/// isConstantSplatVector - Returns true if N is a BUILD_VECTOR node whose +/// elements are all the same constant or undefined. +static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) { + BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N); + if (!C) + return false; + + APInt SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + EVT EltVT = N->getValueType(0).getVectorElementType(); + return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, + HasAnyUndefs) && + EltVT.getSizeInBits() >= SplatBitSize); +} + +// \brief Returns the SDNode if it is a constant BuildVector or constant. +static SDNode *isConstantBuildVectorOrConstantInt(SDValue N) { + if (isa<ConstantSDNode>(N)) + return N.getNode(); + BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N); + if(BV && BV->isConstant()) + return BV; + return NULL; +} + +// \brief Returns the SDNode if it is a constant splat BuildVector or constant +// int. +static ConstantSDNode *isConstOrConstSplat(SDValue N) { + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) + return CN; + + if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) + return BV->getConstantSplatValue(); + + return nullptr; +} + SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, SDValue N0, SDValue N1) { EVT VT = N0.getValueType(); - if (N0.getOpcode() == Opc && isa<ConstantSDNode>(N0.getOperand(1))) { - if (isa<ConstantSDNode>(N1)) { - // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) - SDValue OpNode = - DAG.FoldConstantArithmetic(Opc, VT, - cast<ConstantSDNode>(N0.getOperand(1)), - cast<ConstantSDNode>(N1)); - return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); - } - if (N0.hasOneUse()) { - // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use - SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, - N0.getOperand(0), N1); - AddToWorkList(OpNode.getNode()); - return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); - } - } - - if (N1.getOpcode() == Opc && isa<ConstantSDNode>(N1.getOperand(1))) { - if (isa<ConstantSDNode>(N0)) { - // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) - SDValue OpNode = - DAG.FoldConstantArithmetic(Opc, VT, - cast<ConstantSDNode>(N1.getOperand(1)), - cast<ConstantSDNode>(N0)); - return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); - } - if (N1.hasOneUse()) { - // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use - SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, - N1.getOperand(0), N0); - AddToWorkList(OpNode.getNode()); - return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1)); + if (N0.getOpcode() == Opc) { + if (SDNode *L = isConstantBuildVectorOrConstantInt(N0.getOperand(1))) { + if (SDNode *R = isConstantBuildVectorOrConstantInt(N1)) { + // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) + SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, L, R); + if (!OpNode.getNode()) + return SDValue(); + return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); + } + if (N0.hasOneUse()) { + // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one + // use + SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1); + if (!OpNode.getNode()) + return SDValue(); + AddToWorkList(OpNode.getNode()); + return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); + } + } + } + + if (N1.getOpcode() == Opc) { + if (SDNode *R = isConstantBuildVectorOrConstantInt(N1.getOperand(1))) { + if (SDNode *L = isConstantBuildVectorOrConstantInt(N0)) { + // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) + SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, R, L); + if (!OpNode.getNode()) + return SDValue(); + return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); + } + if (N1.hasOneUse()) { + // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one + // use + SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N1.getOperand(0), N0); + if (!OpNode.getNode()) + return SDValue(); + AddToWorkList(OpNode.getNode()); + return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1)); + } } } @@ -1148,6 +1226,8 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::SHL: return visitSHL(N); case ISD::SRA: return visitSRA(N); case ISD::SRL: return visitSRL(N); + case ISD::ROTR: + case ISD::ROTL: return visitRotate(N); case ISD::CTLZ: return visitCTLZ(N); case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N); case ISD::CTTZ: return visitCTTZ(N); @@ -1193,6 +1273,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); + case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N); } return SDValue(); } @@ -1507,8 +1588,10 @@ SDValue DAGCombiner::visitADD(SDNode *N) { // If all possibly-set bits on the LHS are clear on the RHS, return an OR. // If all possibly-set bits on the RHS are clear on the LHS, return an OR. - if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) - return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1); + if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero){ + if (!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) + return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1); + } } } @@ -1778,22 +1861,6 @@ SDValue DAGCombiner::visitSUBE(SDNode *N) { return SDValue(); } -/// isConstantSplatVector - Returns true if N is a BUILD_VECTOR node whose -/// elements are all the same constant or undefined. -static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) { - BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N); - if (!C) - return false; - - APInt SplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; - EVT EltVT = N->getValueType(0).getVectorElementType(); - return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, - HasAnyUndefs) && - EltVT.getSizeInBits() >= SplatBitSize); -} - SDValue DAGCombiner::visitMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -2229,7 +2296,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, bool HiExists = N->hasAnyUseOfValue(1); if (!HiExists && (!LegalOperations || - TLI.isOperationLegal(LoOp, N->getValueType(0)))) { + TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) { SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->op_begin(), N->getNumOperands()); return CombineTo(N, Res, Res); @@ -2454,35 +2521,66 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // The type-legalizer generates this pattern when loading illegal // vector types from memory. In many cases this allows additional shuffle // optimizations. - if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && - N0.getOperand(1).getOpcode() == ISD::UNDEF && - N1.getOperand(1).getOpcode() == ISD::UNDEF) { + // There are other cases where moving the shuffle after the xor/and/or + // is profitable even if shuffles don't perform a swizzle. + // If both shuffles use the same mask, and both shuffles have the same first + // or second operand, then it might still be profitable to move the shuffle + // after the xor/and/or operation. + if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) { ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0); ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1); - assert(N0.getOperand(0).getValueType() == N1.getOperand(1).getValueType() && + assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() && "Inputs to shuffles are not the same type"); - - unsigned NumElts = VT.getVectorNumElements(); - + // Check that both shuffles use the same mask. The masks are known to be of // the same length because the result vector type is the same. - bool SameMask = true; - for (unsigned i = 0; i != NumElts; ++i) { - int Idx0 = SVN0->getMaskElt(i); - int Idx1 = SVN1->getMaskElt(i); - if (Idx0 != Idx1) { - SameMask = false; - break; + // Check also that shuffles have only one use to avoid introducing extra + // instructions. + if (SVN0->hasOneUse() && SVN1->hasOneUse() && + SVN0->getMask().equals(SVN1->getMask())) { + SDValue ShOp = N0->getOperand(1); + + // Don't try to fold this node if it requires introducing a + // build vector of all zeros that might be illegal at this stage. + if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) { + if (!LegalTypes) + ShOp = DAG.getConstant(0, VT); + else + ShOp = SDValue(); } - } - if (SameMask) { - SDValue Op = DAG.getNode(N->getOpcode(), SDLoc(N), VT, - N0.getOperand(0), N1.getOperand(0)); - AddToWorkList(Op.getNode()); - return DAG.getVectorShuffle(VT, SDLoc(N), Op, - DAG.getUNDEF(VT), &SVN0->getMask()[0]); + // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C) + // (OR (shuf (A, C), shuf (B, C)) -> shuf (OR (A, B), C) + // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0) + if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) { + SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, + N0->getOperand(0), N1->getOperand(0)); + AddToWorkList(NewNode.getNode()); + return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp, + &SVN0->getMask()[0]); + } + + // Don't try to fold this node if it requires introducing a + // build vector of all zeros that might be illegal at this stage. + ShOp = N0->getOperand(0); + if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) { + if (!LegalTypes) + ShOp = DAG.getConstant(0, VT); + else + ShOp = SDValue(); + } + + // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B)) + // (OR (shuf (C, A), shuf (C, B)) -> shuf (C, OR (A, B)) + // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B)) + if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) { + SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, + N0->getOperand(1), N1->getOperand(1)); + AddToWorkList(NewNode.getNode()); + return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode, + &SVN0->getMask()[0]); + } } } @@ -3151,6 +3249,60 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return N0; if (ISD::isBuildVectorAllOnes(N1.getNode())) return N1; + + // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1) + // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2) + // Do this only if the resulting shuffle is legal. + if (isa<ShuffleVectorSDNode>(N0) && + isa<ShuffleVectorSDNode>(N1) && + N0->getOperand(1) == N1->getOperand(1) && + ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode())) { + bool CanFold = true; + unsigned NumElts = VT.getVectorNumElements(); + const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0); + const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1); + // We construct two shuffle masks: + // - Mask1 is a shuffle mask for a shuffle with N0 as the first operand + // and N1 as the second operand. + // - Mask2 is a shuffle mask for a shuffle with N1 as the first operand + // and N0 as the second operand. + // We do this because OR is commutable and therefore there might be + // two ways to fold this node into a shuffle. + SmallVector<int,4> Mask1; + SmallVector<int,4> Mask2; + + for (unsigned i = 0; i != NumElts && CanFold; ++i) { + int M0 = SV0->getMaskElt(i); + int M1 = SV1->getMaskElt(i); + + // Both shuffle indexes are undef. Propagate Undef. + if (M0 < 0 && M1 < 0) { + Mask1.push_back(M0); + Mask2.push_back(M0); + continue; + } + + if (M0 < 0 || M1 < 0 || + (M0 < (int)NumElts && M1 < (int)NumElts) || + (M0 >= (int)NumElts && M1 >= (int)NumElts)) { + CanFold = false; + break; + } + + Mask1.push_back(M0 < (int)NumElts ? M0 : M1 + NumElts); + Mask2.push_back(M1 < (int)NumElts ? M1 : M0 + NumElts); + } + + if (CanFold) { + // Fold this sequence only if the resulting shuffle is 'legal'. + if (TLI.isShuffleMaskLegal(Mask1, VT)) + return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), + N1->getOperand(0), &Mask1[0]); + if (TLI.isShuffleMaskLegal(Mask2, VT)) + return DAG.getVectorShuffle(VT, SDLoc(N), N1->getOperand(0), + N0->getOperand(0), &Mask2[0]); + } + } } // fold (or x, undef) -> -1 @@ -3192,11 +3344,14 @@ SDValue DAGCombiner::visitOR(SDNode *N) { if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && isa<ConstantSDNode>(N0.getOperand(1))) { ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1)); - if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) + if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) { + SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1); + if (!COR.getNode()) + return SDValue(); return DAG.getNode(ISD::AND, SDLoc(N), VT, DAG.getNode(ISD::OR, SDLoc(N0), VT, - N0.getOperand(0), N1), - DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1)); + N0.getOperand(0), N1), COR); + } } // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ @@ -3302,6 +3457,155 @@ static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { return false; } +// Return true if we can prove that, whenever Neg and Pos are both in the +// range [0, OpSize), Neg == (Pos == 0 ? 0 : OpSize - Pos). This means that +// for two opposing shifts shift1 and shift2 and a value X with OpBits bits: +// +// (or (shift1 X, Neg), (shift2 X, Pos)) +// +// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate +// in direction shift1 by Neg. The range [0, OpSize) means that we only need +// to consider shift amounts with defined behavior. +static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned OpSize) { + // If OpSize is a power of 2 then: + // + // (a) (Pos == 0 ? 0 : OpSize - Pos) == (OpSize - Pos) & (OpSize - 1) + // (b) Neg == Neg & (OpSize - 1) whenever Neg is in [0, OpSize). + // + // So if OpSize is a power of 2 and Neg is (and Neg', OpSize-1), we check + // for the stronger condition: + // + // Neg & (OpSize - 1) == (OpSize - Pos) & (OpSize - 1) [A] + // + // for all Neg and Pos. Since Neg & (OpSize - 1) == Neg' & (OpSize - 1) + // we can just replace Neg with Neg' for the rest of the function. + // + // In other cases we check for the even stronger condition: + // + // Neg == OpSize - Pos [B] + // + // for all Neg and Pos. Note that the (or ...) then invokes undefined + // behavior if Pos == 0 (and consequently Neg == OpSize). + // + // We could actually use [A] whenever OpSize is a power of 2, but the + // only extra cases that it would match are those uninteresting ones + // where Neg and Pos are never in range at the same time. E.g. for + // OpSize == 32, using [A] would allow a Neg of the form (sub 64, Pos) + // as well as (sub 32, Pos), but: + // + // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos)) + // + // always invokes undefined behavior for 32-bit X. + // + // Below, Mask == OpSize - 1 when using [A] and is all-ones otherwise. + unsigned MaskLoBits = 0; + if (Neg.getOpcode() == ISD::AND && + isPowerOf2_64(OpSize) && + Neg.getOperand(1).getOpcode() == ISD::Constant && + cast<ConstantSDNode>(Neg.getOperand(1))->getAPIntValue() == OpSize - 1) { + Neg = Neg.getOperand(0); + MaskLoBits = Log2_64(OpSize); + } + + // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1. + if (Neg.getOpcode() != ISD::SUB) + return 0; + ConstantSDNode *NegC = dyn_cast<ConstantSDNode>(Neg.getOperand(0)); + if (!NegC) + return 0; + SDValue NegOp1 = Neg.getOperand(1); + + // On the RHS of [A], if Pos is Pos' & (OpSize - 1), just replace Pos with + // Pos'. The truncation is redundant for the purpose of the equality. + if (MaskLoBits && + Pos.getOpcode() == ISD::AND && + Pos.getOperand(1).getOpcode() == ISD::Constant && + cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() == OpSize - 1) + Pos = Pos.getOperand(0); + + // The condition we need is now: + // + // (NegC - NegOp1) & Mask == (OpSize - Pos) & Mask + // + // If NegOp1 == Pos then we need: + // + // OpSize & Mask == NegC & Mask + // + // (because "x & Mask" is a truncation and distributes through subtraction). + APInt Width; + if (Pos == NegOp1) + Width = NegC->getAPIntValue(); + // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC. + // Then the condition we want to prove becomes: + // + // (NegC - NegOp1) & Mask == (OpSize - (NegOp1 + PosC)) & Mask + // + // which, again because "x & Mask" is a truncation, becomes: + // + // NegC & Mask == (OpSize - PosC) & Mask + // OpSize & Mask == (NegC + PosC) & Mask + else if (Pos.getOpcode() == ISD::ADD && + Pos.getOperand(0) == NegOp1 && + Pos.getOperand(1).getOpcode() == ISD::Constant) + Width = (cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() + + NegC->getAPIntValue()); + else + return false; + + // Now we just need to check that OpSize & Mask == Width & Mask. + if (MaskLoBits) + // Opsize & Mask is 0 since Mask is Opsize - 1. + return Width.getLoBits(MaskLoBits) == 0; + return Width == OpSize; +} + +// A subroutine of MatchRotate used once we have found an OR of two opposite +// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces +// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the +// former being preferred if supported. InnerPos and InnerNeg are Pos and +// Neg with outer conversions stripped away. +SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, + SDValue Neg, SDValue InnerPos, + SDValue InnerNeg, unsigned PosOpcode, + unsigned NegOpcode, SDLoc DL) { + // fold (or (shl x, (*ext y)), + // (srl x, (*ext (sub 32, y)))) -> + // (rotl x, y) or (rotr x, (sub 32, y)) + // + // fold (or (shl x, (*ext (sub 32, y))), + // (srl x, (*ext y))) -> + // (rotr x, y) or (rotl x, (sub 32, y)) + EVT VT = Shifted.getValueType(); + if (matchRotateSub(InnerPos, InnerNeg, VT.getSizeInBits())) { + bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); + return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted, + HasPos ? Pos : Neg).getNode(); + } + + // fold (or (shl (*ext x), (*ext y)), + // (srl (*ext x), (*ext (sub 32, y)))) -> + // (*ext (rotl x, y)) or (*ext (rotr x, (sub 32, y))) + // + // fold (or (shl (*ext x), (*ext (sub 32, y))), + // (srl (*ext x), (*ext y))) -> + // (*ext (rotr x, y)) or (*ext (rotl x, (sub 32, y))) + if (Shifted.getOpcode() == ISD::ZERO_EXTEND || + Shifted.getOpcode() == ISD::ANY_EXTEND) { + SDValue InnerShifted = Shifted.getOperand(0); + EVT InnerVT = InnerShifted.getValueType(); + bool HasPosInner = TLI.isOperationLegalOrCustom(PosOpcode, InnerVT); + if (HasPosInner || TLI.isOperationLegalOrCustom(NegOpcode, InnerVT)) { + if (matchRotateSub(InnerPos, InnerNeg, InnerVT.getSizeInBits())) { + SDValue V = DAG.getNode(HasPosInner ? PosOpcode : NegOpcode, DL, + InnerVT, InnerShifted, HasPosInner ? Pos : Neg); + return DAG.getNode(Shifted.getOpcode(), DL, VT, V).getNode(); + } + } + } + + return 0; +} + // MatchRotate - Handle an 'or' of two operands. If this is one of the many // idioms for rotate, and if the target supports rotation instructions, generate // a rot[lr]. @@ -3342,6 +3646,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { unsigned OpSizeInBits = VT.getSizeInBits(); SDValue LHSShiftArg = LHSShift.getOperand(0); SDValue LHSShiftAmt = LHSShift.getOperand(1); + SDValue RHSShiftArg = RHSShift.getOperand(0); SDValue RHSShiftAmt = RHSShift.getOperand(1); // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) @@ -3395,28 +3700,15 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { RExtOp0 = RHSShiftAmt.getOperand(0); } - if (RExtOp0.getOpcode() == ISD::SUB && RExtOp0.getOperand(1) == LExtOp0) { - // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> - // (rotl x, y) - // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> - // (rotr x, (sub 32, y)) - if (ConstantSDNode *SUBC = - dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0))) - if (SUBC->getAPIntValue() == OpSizeInBits) - return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, - HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); - } else if (LExtOp0.getOpcode() == ISD::SUB && - RExtOp0 == LExtOp0.getOperand(1)) { - // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> - // (rotr x, y) - // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> - // (rotl x, (sub 32, y)) - if (ConstantSDNode *SUBC = - dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0))) - if (SUBC->getAPIntValue() == OpSizeInBits) - return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg, - HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); - } + SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, + LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL); + if (TryL) + return TryL; + + SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, + RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL); + if (TryR) + return TryR; return 0; } @@ -3559,7 +3851,11 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { /// visitShiftByConstant - Handle transforms common to the three shifts, when /// the shift amount is a constant. -SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) { +SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { + // We can't and shouldn't fold opaque constants. + if (Amt->isOpaque()) + return SDValue(); + SDNode *LHS = N->getOperand(0).getNode(); if (!LHS->hasOneUse()) return SDValue(); @@ -3585,9 +3881,9 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) { break; } - // We require the RHS of the binop to be a constant as well. + // We require the RHS of the binop to be a constant and not opaque as well. ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1)); - if (!BinOpCst) return SDValue(); + if (!BinOpCst || BinOpCst->isOpaque()) return SDValue(); // FIXME: disable this unless the input to the binop is a shift by a constant. // If it is not a shift, it pessimizes some common cases like: @@ -3617,6 +3913,7 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) { SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)), N->getValueType(0), LHS->getOperand(1), N->getOperand(1)); + assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!"); // Create the new shift. SDValue NewShift = DAG.getNode(N->getOpcode(), @@ -3627,18 +3924,74 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) { return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS); } +SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) { + assert(N->getOpcode() == ISD::TRUNCATE); + assert(N->getOperand(0).getOpcode() == ISD::AND); + + // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC) + if (N->hasOneUse() && N->getOperand(0).hasOneUse()) { + SDValue N01 = N->getOperand(0).getOperand(1); + + if (ConstantSDNode *N01C = isConstOrConstSplat(N01)) { + EVT TruncVT = N->getValueType(0); + SDValue N00 = N->getOperand(0).getOperand(0); + APInt TruncC = N01C->getAPIntValue(); + TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits()); + + return DAG.getNode(ISD::AND, SDLoc(N), TruncVT, + DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, N00), + DAG.getConstant(TruncC, TruncVT)); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitRotate(SDNode *N) { + // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))). + if (N->getOperand(1).getOpcode() == ISD::TRUNCATE && + N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) { + SDValue NewOp1 = distributeTruncateThroughAnd(N->getOperand(1).getNode()); + if (NewOp1.getNode()) + return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), + N->getOperand(0), NewOp1); + } + return SDValue(); +} + SDValue DAGCombiner::visitSHL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N0.getValueType(); - unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); + unsigned OpSizeInBits = VT.getScalarSizeInBits(); // fold vector ops if (VT.isVector()) { SDValue FoldedVOp = SimplifyVBinOp(N); if (FoldedVOp.getNode()) return FoldedVOp; + + BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1); + // If setcc produces all-one true value then: + // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV) + if (N1CV && N1CV->isConstant()) { + if (N0.getOpcode() == ISD::AND && + TLI.getBooleanContents(true) == + TargetLowering::ZeroOrNegativeOneBooleanContent) { + SDValue N00 = N0->getOperand(0); + SDValue N01 = N0->getOperand(1); + BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01); + + if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC) { + SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV); + if (C.getNode()) + return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C); + } + } else { + N1C = isConstOrConstSplat(N1); + } + } } // fold (shl c1, c2) -> c1<<c2 @@ -3662,35 +4015,25 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { return DAG.getConstant(0, VT); // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && - N1.getOperand(0).getOpcode() == ISD::AND && - N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { - SDValue N101 = N1.getOperand(0).getOperand(1); - if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) { - EVT TruncVT = N1.getValueType(); - SDValue N100 = N1.getOperand(0).getOperand(0); - APInt TruncC = N101C->getAPIntValue(); - TruncC = TruncC.trunc(TruncVT.getSizeInBits()); - return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, - DAG.getNode(ISD::AND, SDLoc(N), TruncVT, - DAG.getNode(ISD::TRUNCATE, - SDLoc(N), - TruncVT, N100), - DAG.getConstant(TruncC, TruncVT))); - } + N1.getOperand(0).getOpcode() == ISD::AND) { + SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); + if (NewOp1.getNode()) + return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1); } if (N1C && SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2)) - if (N1C && N0.getOpcode() == ISD::SHL && - N0.getOperand(1).getOpcode() == ISD::Constant) { - uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); - uint64_t c2 = N1C->getZExtValue(); - if (c1 + c2 >= OpSizeInBits) - return DAG.getConstant(0, VT); - return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(c1 + c2, N1.getValueType())); + if (N1C && N0.getOpcode() == ISD::SHL) { + if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { + uint64_t c1 = N0C1->getZExtValue(); + uint64_t c2 = N1C->getZExtValue(); + if (c1 + c2 >= OpSizeInBits) + return DAG.getConstant(0, VT); + return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), + DAG.getConstant(c1 + c2, N1.getValueType())); + } } // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2))) @@ -3701,20 +4044,21 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND) && - N0.getOperand(0).getOpcode() == ISD::SHL && - isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) { - uint64_t c1 = - cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue(); - uint64_t c2 = N1C->getZExtValue(); - EVT InnerShiftVT = N0.getOperand(0).getValueType(); - uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits(); - if (c2 >= OpSizeInBits - InnerShiftSize) { - if (c1 + c2 >= OpSizeInBits) - return DAG.getConstant(0, VT); - return DAG.getNode(ISD::SHL, SDLoc(N0), VT, - DAG.getNode(N0.getOpcode(), SDLoc(N0), VT, - N0.getOperand(0)->getOperand(0)), - DAG.getConstant(c1 + c2, N1.getValueType())); + N0.getOperand(0).getOpcode() == ISD::SHL) { + SDValue N0Op0 = N0.getOperand(0); + if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) { + uint64_t c1 = N0Op0C1->getZExtValue(); + uint64_t c2 = N1C->getZExtValue(); + EVT InnerShiftVT = N0Op0.getValueType(); + uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits(); + if (c2 >= OpSizeInBits - InnerShiftSize) { + if (c1 + c2 >= OpSizeInBits) + return DAG.getConstant(0, VT); + return DAG.getNode(ISD::SHL, SDLoc(N0), VT, + DAG.getNode(N0.getOpcode(), SDLoc(N0), VT, + N0Op0->getOperand(0)), + DAG.getConstant(c1 + c2, N1.getValueType())); + } } } @@ -3722,19 +4066,20 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // Only fold this if the inner zext has no other uses to avoid increasing // the total number of instructions. if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() && - N0.getOperand(0).getOpcode() == ISD::SRL && - isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) { - uint64_t c1 = - cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue(); - if (c1 < VT.getSizeInBits()) { - uint64_t c2 = N1C->getZExtValue(); - if (c1 == c2) { - SDValue NewOp0 = N0.getOperand(0); - EVT CountVT = NewOp0.getOperand(1).getValueType(); - SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(), - NewOp0, DAG.getConstant(c2, CountVT)); - AddToWorkList(NewSHL.getNode()); - return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL); + N0.getOperand(0).getOpcode() == ISD::SRL) { + SDValue N0Op0 = N0.getOperand(0); + if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) { + uint64_t c1 = N0Op0C1->getZExtValue(); + if (c1 < VT.getScalarSizeInBits()) { + uint64_t c2 = N1C->getZExtValue(); + if (c1 == c2) { + SDValue NewOp0 = N0.getOperand(0); + EVT CountVT = NewOp0.getOperand(1).getValueType(); + SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(), + NewOp0, DAG.getConstant(c2, CountVT)); + AddToWorkList(NewSHL.getNode()); + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL); + } } } } @@ -3743,40 +4088,39 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // (and (srl x, (sub c1, c2), MASK) // Only fold this if the inner shift has no other uses -- if it does, folding // this will increase the total number of instructions. - if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() && - N0.getOperand(1).getOpcode() == ISD::Constant) { - uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); - if (c1 < VT.getSizeInBits()) { - uint64_t c2 = N1C->getZExtValue(); - APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), - VT.getSizeInBits() - c1); - SDValue Shift; - if (c2 > c1) { - Mask = Mask.shl(c2-c1); - Shift = DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(c2-c1, N1.getValueType())); - } else { - Mask = Mask.lshr(c1-c2); - Shift = DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(c1-c2, N1.getValueType())); + if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { + if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { + uint64_t c1 = N0C1->getZExtValue(); + if (c1 < OpSizeInBits) { + uint64_t c2 = N1C->getZExtValue(); + APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1); + SDValue Shift; + if (c2 > c1) { + Mask = Mask.shl(c2 - c1); + Shift = DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), + DAG.getConstant(c2 - c1, N1.getValueType())); + } else { + Mask = Mask.lshr(c1 - c2); + Shift = DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), + DAG.getConstant(c1 - c2, N1.getValueType())); + } + return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift, + DAG.getConstant(Mask, VT)); } - return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift, - DAG.getConstant(Mask, VT)); } } // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) { + unsigned BitSize = VT.getScalarSizeInBits(); SDValue HiBitsMask = - DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(), - VT.getSizeInBits() - - N1C->getZExtValue()), - VT); + DAG.getConstant(APInt::getHighBitsSet(BitSize, + BitSize - N1C->getZExtValue()), VT); return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), HiBitsMask); } if (N1C) { - SDValue NewSHL = visitShiftByConstant(N, N1C->getZExtValue()); + SDValue NewSHL = visitShiftByConstant(N, N1C); if (NewSHL.getNode()) return NewSHL; } @@ -3796,6 +4140,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { if (VT.isVector()) { SDValue FoldedVOp = SimplifyVBinOp(N); if (FoldedVOp.getNode()) return FoldedVOp; + + N1C = isConstOrConstSplat(N1); } // fold (sra c1, c2) -> (sra c1, c2) @@ -3829,11 +4175,12 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) if (N1C && N0.getOpcode() == ISD::SRA) { - if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + if (ConstantSDNode *C1 = isConstOrConstSplat(N0.getOperand(1))) { unsigned Sum = N1C->getZExtValue() + C1->getZExtValue(); - if (Sum >= OpSizeInBits) Sum = OpSizeInBits-1; + if (Sum >= OpSizeInBits) + Sum = OpSizeInBits - 1; return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(Sum, N1C->getValueType(0))); + DAG.getConstant(Sum, N1.getValueType())); } } @@ -3842,14 +4189,17 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // result_size - n != m. // If truncate is free for the target sext(shl) is likely to result in better // code. - if (N0.getOpcode() == ISD::SHL) { + if (N0.getOpcode() == ISD::SHL && N1C) { // Get the two constanst of the shifts, CN0 = m, CN = n. - const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); - if (N01C && N1C) { + const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1)); + if (N01C) { + LLVMContext &Ctx = *DAG.getContext(); // Determine what the truncate's result bitsize and type would be. - EVT TruncVT = - EVT::getIntegerVT(*DAG.getContext(), - OpSizeInBits - N1C->getZExtValue()); + EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue()); + + if (VT.isVector()) + TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements()); + // Determine the residual right-shift amount. signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); @@ -3876,44 +4226,33 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && - N1.getOperand(0).getOpcode() == ISD::AND && - N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { - SDValue N101 = N1.getOperand(0).getOperand(1); - if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) { - EVT TruncVT = N1.getValueType(); - SDValue N100 = N1.getOperand(0).getOperand(0); - APInt TruncC = N101C->getAPIntValue(); - TruncC = TruncC.trunc(TruncVT.getScalarType().getSizeInBits()); - return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, - DAG.getNode(ISD::AND, SDLoc(N), - TruncVT, - DAG.getNode(ISD::TRUNCATE, - SDLoc(N), - TruncVT, N100), - DAG.getConstant(TruncC, TruncVT))); - } - } - - // fold (sra (trunc (sr x, c1)), c2) -> (trunc (sra x, c1+c2)) + N1.getOperand(0).getOpcode() == ISD::AND) { + SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); + if (NewOp1.getNode()) + return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1); + } + + // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2)) // if c1 is equal to the number of bits the trunc removes if (N0.getOpcode() == ISD::TRUNCATE && (N0.getOperand(0).getOpcode() == ISD::SRL || N0.getOperand(0).getOpcode() == ISD::SRA) && N0.getOperand(0).hasOneUse() && N0.getOperand(0).getOperand(1).hasOneUse() && - N1C && isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) { - EVT LargeVT = N0.getOperand(0).getValueType(); - ConstantSDNode *LargeShiftAmt = - cast<ConstantSDNode>(N0.getOperand(0).getOperand(1)); - - if (LargeVT.getScalarType().getSizeInBits() - OpSizeInBits == - LargeShiftAmt->getZExtValue()) { - SDValue Amt = - DAG.getConstant(LargeShiftAmt->getZExtValue() + N1C->getZExtValue(), - getShiftAmountTy(N0.getOperand(0).getOperand(0).getValueType())); - SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), LargeVT, - N0.getOperand(0).getOperand(0), Amt); - return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, SRA); + N1C) { + SDValue N0Op0 = N0.getOperand(0); + if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) { + unsigned LargeShiftVal = LargeShift->getZExtValue(); + EVT LargeVT = N0Op0.getValueType(); + + if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) { + SDValue Amt = + DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), + getShiftAmountTy(N0Op0.getOperand(0).getValueType())); + SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), LargeVT, + N0Op0.getOperand(0), Amt); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, SRA); + } } } @@ -3927,7 +4266,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1); if (N1C) { - SDValue NewSRA = visitShiftByConstant(N, N1C->getZExtValue()); + SDValue NewSRA = visitShiftByConstant(N, N1C); if (NewSRA.getNode()) return NewSRA; } @@ -3947,6 +4286,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (VT.isVector()) { SDValue FoldedVOp = SimplifyVBinOp(N); if (FoldedVOp.getNode()) return FoldedVOp; + + N1C = isConstOrConstSplat(N1); } // fold (srl c1, c2) -> c1 >>u c2 @@ -3967,14 +4308,15 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return DAG.getConstant(0, VT); // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2)) - if (N1C && N0.getOpcode() == ISD::SRL && - N0.getOperand(1).getOpcode() == ISD::Constant) { - uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); - uint64_t c2 = N1C->getZExtValue(); - if (c1 + c2 >= OpSizeInBits) - return DAG.getConstant(0, VT); - return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(c1 + c2, N1.getValueType())); + if (N1C && N0.getOpcode() == ISD::SRL) { + if (ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1))) { + uint64_t c1 = N01C->getZExtValue(); + uint64_t c2 = N1C->getZExtValue(); + if (c1 + c2 >= OpSizeInBits) + return DAG.getConstant(0, VT); + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), + DAG.getConstant(c1 + c2, N1.getValueType())); + } } // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2))) @@ -3999,18 +4341,21 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { } // fold (srl (shl x, c), c) -> (and x, cst2) - if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 && - N0.getValueSizeInBits() <= 64) { - uint64_t ShAmt = N1C->getZExtValue()+64-N0.getValueSizeInBits(); - return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(~0ULL >> ShAmt, VT)); + if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1) { + unsigned BitSize = N0.getScalarValueSizeInBits(); + if (BitSize <= 64) { + uint64_t ShAmt = N1C->getZExtValue() + 64 - BitSize; + return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), + DAG.getConstant(~0ULL >> ShAmt, VT)); + } } // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask) if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { // Shifting in all undef bits? EVT SmallVT = N0.getOperand(0).getValueType(); - if (N1C->getZExtValue() >= SmallVT.getSizeInBits()) + unsigned BitSize = SmallVT.getScalarSizeInBits(); + if (N1C->getZExtValue() >= BitSize) return DAG.getUNDEF(VT); if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { @@ -4019,7 +4364,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { N0.getOperand(0), DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT))); AddToWorkList(SmallShift.getNode()); - APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits()).lshr(ShiftAmt); + APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt); return DAG.getNode(ISD::AND, SDLoc(N), VT, DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SmallShift), DAG.getConstant(Mask, VT)); @@ -4028,14 +4373,14 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign // bit, which is unmodified by sra. - if (N1C && N1C->getZExtValue() + 1 == VT.getSizeInBits()) { + if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) { if (N0.getOpcode() == ISD::SRA) return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1); } // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit). if (N1C && N0.getOpcode() == ISD::CTLZ && - N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) { + N1C->getAPIntValue() == Log2_32(OpSizeInBits)) { APInt KnownZero, KnownOne; DAG.ComputeMaskedBits(N0.getOperand(0), KnownZero, KnownOne); @@ -4070,22 +4415,10 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && - N1.getOperand(0).getOpcode() == ISD::AND && - N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { - SDValue N101 = N1.getOperand(0).getOperand(1); - if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) { - EVT TruncVT = N1.getValueType(); - SDValue N100 = N1.getOperand(0).getOperand(0); - APInt TruncC = N101C->getAPIntValue(); - TruncC = TruncC.trunc(TruncVT.getSizeInBits()); - return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, - DAG.getNode(ISD::AND, SDLoc(N), - TruncVT, - DAG.getNode(ISD::TRUNCATE, - SDLoc(N), - TruncVT, N100), - DAG.getConstant(TruncC, TruncVT))); - } + N1.getOperand(0).getOpcode() == ISD::AND) { + SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); + if (NewOp1.getNode()) + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1); } // fold operands of srl based on knowledge that the low bits are not @@ -4094,7 +4427,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return SDValue(N, 0); if (N1C) { - SDValue NewSRL = visitShiftByConstant(N, N1C->getZExtValue()); + SDValue NewSRL = visitShiftByConstant(N, N1C); if (NewSRL.getNode()) return NewSRL; } @@ -4275,12 +4608,12 @@ static std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) { SDLoc DL(N); EVT LoVT, HiVT; - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); // Split the inputs. SDValue Lo, Hi, LL, LH, RL, RH; - llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); - llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); + std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); + std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); @@ -4338,9 +4671,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { return SDValue(); SDValue Lo, Hi, CCLo, CCHi, LL, LH, RL, RH; - llvm::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG); - llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 1); - llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 2); + std::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG); + std::tie(LL, LH) = DAG.SplitVectorOperand(N, 1); + std::tie(RL, RH) = DAG.SplitVectorOperand(N, 2); Lo = DAG.getNode(N->getOpcode(), DL, LL.getValueType(), CCLo, LL, RL); Hi = DAG.getNode(N->getOpcode(), DL, LH.getValueType(), CCHi, LH, RH); @@ -4353,6 +4686,13 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); } + // Fold (vselect (build_vector all_ones), N1, N2) -> N1 + if (ISD::isBuildVectorAllOnes(N0.getNode())) + return N1; + // Fold (vselect (build_vector all_zeros), N1, N2) -> N2 + if (ISD::isBuildVectorAllZeros(N0.getNode())) + return N2; + return SDValue(); } @@ -4402,6 +4742,65 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) { SDLoc(N)); } +// tryToFoldExtendOfConstant - Try to fold a sext/zext/aext +// dag node into a ConstantSDNode or a build_vector of constants. +// This function is called by the DAGCombiner when visiting sext/zext/aext +// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND). +// Vector extends are not folded if operations are legal; this is to +// avoid introducing illegal build_vector dag nodes. +static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, + SelectionDAG &DAG, bool LegalTypes, + bool LegalOperations) { + unsigned Opcode = N->getOpcode(); + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || + Opcode == ISD::ANY_EXTEND) && "Expected EXTEND dag node in input!"); + + // fold (sext c1) -> c1 + // fold (zext c1) -> c1 + // fold (aext c1) -> c1 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode(); + + // fold (sext (build_vector AllConstants) -> (build_vector AllConstants) + // fold (zext (build_vector AllConstants) -> (build_vector AllConstants) + // fold (aext (build_vector AllConstants) -> (build_vector AllConstants) + EVT SVT = VT.getScalarType(); + if (!(VT.isVector() && + (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) && + ISD::isBuildVectorOfConstantSDNodes(N0.getNode()))) + return 0; + + // We can fold this node into a build_vector. + unsigned VTBits = SVT.getSizeInBits(); + unsigned EVTBits = N0->getValueType(0).getScalarType().getSizeInBits(); + unsigned ShAmt = VTBits - EVTBits; + SmallVector<SDValue, 8> Elts; + unsigned NumElts = N0->getNumOperands(); + SDLoc DL(N); + + for (unsigned i=0; i != NumElts; ++i) { + SDValue Op = N0->getOperand(i); + if (Op->getOpcode() == ISD::UNDEF) { + Elts.push_back(DAG.getUNDEF(SVT)); + continue; + } + + ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op); + const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue()); + if (Opcode == ISD::SIGN_EXTEND) + Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(), + SVT)); + else + Elts.push_back(DAG.getConstant(C.shl(ShAmt).lshr(ShAmt).getZExtValue(), + SVT)); + } + + return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], NumElts).getNode(); +} + // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this: // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))" // transformation. Returns true if extension are possible and the above @@ -4492,9 +4891,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - // fold (sext c1) -> c1 - if (isa<ConstantSDNode>(N0)) - return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N0); + if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, + LegalOperations)) + return SDValue(Res, 0); // fold (sext (sext x)) -> (sext x) // fold (sext (aext x)) -> (sext x) @@ -4671,7 +5070,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { } } - // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc) + // sext(setcc x, y, cc) -> (select (setcc x, y, cc), -1, 0) unsigned ElementWidth = VT.getScalarType().getSizeInBits(); SDValue NegOne = DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT); @@ -4680,15 +5079,21 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { NegOne, DAG.getConstant(0, VT), cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); if (SCC.getNode()) return SCC; - if (!VT.isVector() && - (!LegalOperations || - TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(VT)))) { - return DAG.getSelect(SDLoc(N), VT, - DAG.getSetCC(SDLoc(N), - getSetCCResultType(VT), - N0.getOperand(0), N0.getOperand(1), - cast<CondCodeSDNode>(N0.getOperand(2))->get()), - NegOne, DAG.getConstant(0, VT)); + + if (!VT.isVector()) { + EVT SetCCVT = getSetCCResultType(N0.getOperand(0).getValueType()); + if (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, SetCCVT)) { + SDLoc DL(N); + ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); + SDValue SetCC = DAG.getSetCC(DL, + SetCCVT, + N0.getOperand(0), N0.getOperand(1), CC); + EVT SelectVT = getSetCCResultType(VT); + return DAG.getSelect(DL, VT, + DAG.getSExtOrTrunc(SetCC, DL, SelectVT), + NegOne, DAG.getConstant(0, VT)); + + } } } @@ -4742,9 +5147,10 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - // fold (zext c1) -> c1 - if (isa<ConstantSDNode>(N0)) - return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0); + if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, + LegalOperations)) + return SDValue(Res, 0); + // fold (zext (zext x)) -> (zext x) // fold (zext (aext x)) -> (zext x) if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) @@ -4925,10 +5331,14 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { } if (N0.getOpcode() == ISD::SETCC) { - if (!LegalOperations && VT.isVector()) { + if (!LegalOperations && VT.isVector() && + N0.getValueType().getVectorElementType() == MVT::i1) { + EVT N0VT = N0.getOperand(0).getValueType(); + if (getSetCCResultType(N0VT) == N0.getValueType()) + return SDValue(); + // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors. // Only do this before legalize for now. - EVT N0VT = N0.getOperand(0).getValueType(); EVT EltVT = VT.getVectorElementType(); SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(), DAG.getConstant(1, EltVT)); @@ -5007,9 +5417,10 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - // fold (aext c1) -> c1 - if (isa<ConstantSDNode>(N0)) - return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, N0); + if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, + LegalOperations)) + return SDValue(Res, 0); + // fold (aext (aext x)) -> (aext x) // fold (aext (zext x)) -> (zext x) // fold (aext (sext x)) -> (sext x) @@ -5466,6 +5877,29 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { BSwap, N1); } + // Fold a sext_inreg of a build_vector of ConstantSDNodes or undefs + // into a build_vector. + if (ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { + SmallVector<SDValue, 8> Elts; + unsigned NumElts = N0->getNumOperands(); + unsigned ShAmt = VTBits - EVTBits; + + for (unsigned i = 0; i != NumElts; ++i) { + SDValue Op = N0->getOperand(i); + if (Op->getOpcode() == ISD::UNDEF) { + Elts.push_back(Op); + continue; + } + + ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op); + const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue()); + Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(), + Op.getValueType())); + } + + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, &Elts[0], NumElts); + } + return SDValue(); } @@ -5510,7 +5944,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // creates this pattern) and before operation legalization after which // we need to be more careful about the vector instructions that we generate. if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && - LegalTypes && !LegalOperations && N0->hasOneUse()) { + LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) { EVT VecTy = N0.getOperand(0).getValueType(); EVT ExTy = N0.getValueType(); @@ -5587,6 +6021,20 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue Reduced = ReduceLoadWidth(N); if (Reduced.getNode()) return Reduced; + // Handle the case where the load remains an extending load even + // after truncation. + if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + if (!LN0->isVolatile() && + LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) { + SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0), + VT, LN0->getChain(), LN0->getBasePtr(), + LN0->getMemoryVT(), + LN0->getMemOperand()); + DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1)); + return NewLoad; + } + } } // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)), // where ... are all 'undef'. @@ -5654,8 +6102,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0)); LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1)); if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() || - LD1->getPointerInfo().getAddrSpace() != - LD2->getPointerInfo().getAddrSpace()) + LD1->getAddressSpace() != LD2->getAddressSpace()) return SDValue(); EVT LD1VT = LD1->getValueType(0); @@ -5691,14 +6138,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { if (!LegalTypes && N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() && VT.isVector()) { - bool isSimple = true; - for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) - if (N0.getOperand(i).getOpcode() != ISD::UNDEF && - N0.getOperand(i).getOpcode() != ISD::Constant && - N0.getOperand(i).getOpcode() != ISD::ConstantFP) { - isSimple = false; - break; - } + bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant(); EVT DestEltVT = N->getValueType(0).getVectorElementType(); assert(!DestEltVT.isVector() && @@ -6551,7 +6991,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0); } - // The next optimizations are desireable only if SELECT_CC can be lowered. + // The next optimizations are desirable only if SELECT_CC can be lowered. // Check against MVT::Other for SELECT_CC, which is a workaround for targets // having to say they don't support SELECT_CC on every type the DAG knows // about, since there is no way to mark an opcode illegal at all value types @@ -6608,7 +7048,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); } - // The next optimizations are desireable only if SELECT_CC can be lowered. + // The next optimizations are desirable only if SELECT_CC can be lowered. // Check against MVT::Other for SELECT_CC, which is a workaround for targets // having to say they don't support SELECT_CC on every type the DAG knows // about, since there is no way to mark an opcode illegal at all value types @@ -7537,7 +7977,12 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA : TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA(); - if (UseAA) { +#ifndef NDEBUG + if (CombinerAAOnlyFunc.getNumOccurrences() && + CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) + UseAA = false; +#endif + if (UseAA && LD->isUnindexed()) { // Walk up chain skipping non-aliasing memory nodes. SDValue BetterChain = FindBetterChain(N, Chain); @@ -7888,14 +8333,6 @@ struct LoadedSlice { }; } -/// \brief Sorts LoadedSlice according to their offset. -struct LoadedSliceSorter { - bool operator()(const LoadedSlice &LHS, const LoadedSlice &RHS) { - assert(LHS.Origin == RHS.Origin && "Different bases not implemented."); - return LHS.getOffsetFromBase() < RHS.getOffsetFromBase(); - } -}; - /// \brief Check that all bits set in \p UsedBits form a dense region, i.e., /// \p UsedBits looks like 0..0 1..1 0..0. static bool areUsedBitsDense(const APInt &UsedBits) { @@ -7939,7 +8376,11 @@ static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices, // Sort the slices so that elements that are likely to be next to each // other in memory are next to each other in the list. - std::sort(LoadedSlices.begin(), LoadedSlices.end(), LoadedSliceSorter()); + std::sort(LoadedSlices.begin(), LoadedSlices.end(), + [](const LoadedSlice &LHS, const LoadedSlice &RHS) { + assert(LHS.Origin == RHS.Origin && "Different bases not implemented."); + return LHS.getOffsetFromBase() < RHS.getOffsetFromBase(); + }); const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo(); // First (resp. Second) is the first (resp. Second) potentially candidate // to be placed in a paired load. @@ -8075,8 +8516,8 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) { // The width of the type must be a power of 2 and greater than 8-bits. // Otherwise the load cannot be represented in LLVM IR. - // Moreover, if we shifted with a non 8-bits multiple, the slice - // will be accross several bytes. We do not support that. + // Moreover, if we shifted with a non-8-bits multiple, the slice + // will be across several bytes. We do not support that. unsigned Width = User->getValueSizeInBits(0); if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7)) return 0; @@ -8543,14 +8984,6 @@ struct MemOpLink { unsigned SequenceNum; }; -/// Sorts store nodes in a link according to their offset from a shared -// base ptr. -struct ConsecutiveMemoryChainSorter { - bool operator()(MemOpLink LHS, MemOpLink RHS) { - return LHS.OffsetFromBase < RHS.OffsetFromBase; - } -}; - bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { EVT MemVT = St->getMemoryVT(); int64_t ElementSizeBytes = MemVT.getSizeInBits()/8; @@ -8669,7 +9102,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // Sort the memory operands according to their distance from the base pointer. std::sort(StoreNodes.begin(), StoreNodes.end(), - ConsecutiveMemoryChainSorter()); + [](MemOpLink LHS, MemOpLink RHS) { + return LHS.OffsetFromBase < RHS.OffsetFromBase || + (LHS.OffsetFromBase == RHS.OffsetFromBase && + LHS.SequenceNum > RHS.SequenceNum); + }); // Scan the memory operations on the chain and find the first non-consecutive // store memory address. @@ -8717,7 +9154,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) { NonZero |= !C->getConstantFPValue()->isNullValue(); } else { - // Non constant. + // Non-constant. break; } @@ -9125,7 +9562,12 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA : TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA(); - if (UseAA) { +#ifndef NDEBUG + if (CombinerAAOnlyFunc.getNumOccurrences() && + CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) + UseAA = false; +#endif + if (UseAA && ST->isUnindexed()) { // Walk up chain skipping non-aliasing memory nodes. SDValue BetterChain = FindBetterChain(N, Chain); @@ -9306,9 +9748,10 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // We only perform this optimization before the op legalization phase because // we may introduce new vector instructions which are not backed by TD // patterns. For example on AVX, extracting elements from a wide vector - // without using extract_subvector. + // without using extract_subvector. However, if we can find an underlying + // scalar value, then we can always use that. if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE - && ConstEltNo && !LegalOperations) { + && ConstEltNo) { int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); int NumElem = VT.getVectorNumElements(); ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec); @@ -9320,16 +9763,32 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { return DAG.getUNDEF(NVT); // Select the right vector half to extract from. + SDValue SVInVec; if (OrigElt < NumElem) { - InVec = InVec->getOperand(0); + SVInVec = InVec->getOperand(0); } else { - InVec = InVec->getOperand(1); + SVInVec = InVec->getOperand(1); OrigElt -= NumElem; } - EVT IndexTy = TLI.getVectorIdxTy(); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, - InVec, DAG.getConstant(OrigElt, IndexTy)); + if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) { + SDValue InOp = SVInVec.getOperand(OrigElt); + if (InOp.getValueType() != NVT) { + assert(InOp.getValueType().isInteger() && NVT.isInteger()); + InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT); + } + + return InOp; + } + + // FIXME: We should handle recursing on other vector shuffles and + // scalar_to_vector here as well. + + if (!LegalOperations) { + EVT IndexTy = TLI.getVectorIdxTy(); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, + SVInVec, DAG.getConstant(OrigElt, IndexTy)); + } } // Perform only after legalization to ensure build_vector / vector_shuffle @@ -9836,6 +10295,26 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { } } + // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...)) + // -> (BUILD_VECTOR A, B, ..., C, D, ...) + if (N->getNumOperands() == 2 && + N->getOperand(0).getOpcode() == ISD::BUILD_VECTOR && + N->getOperand(1).getOpcode() == ISD::BUILD_VECTOR) { + EVT VT = N->getValueType(0); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SmallVector<SDValue, 8> Opnds; + unsigned BuildVecNumElts = N0.getNumOperands(); + + for (unsigned i = 0; i != BuildVecNumElts; ++i) + Opnds.push_back(N0.getOperand(i)); + for (unsigned i = 0; i != BuildVecNumElts; ++i) + Opnds.push_back(N1.getOperand(i)); + + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, &Opnds[0], + Opnds.size()); + } + // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR // nodes often generate nop CONCAT_VECTOR nodes. // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that @@ -10142,6 +10621,33 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N2 = N->getOperand(2); + + // If the input vector is a concatenation, and the insert replaces + // one of the halves, we can optimize into a single concat_vectors. + if (N0.getOpcode() == ISD::CONCAT_VECTORS && + N0->getNumOperands() == 2 && N2.getOpcode() == ISD::Constant) { + APInt InsIdx = cast<ConstantSDNode>(N2)->getAPIntValue(); + EVT VT = N->getValueType(0); + + // Lower half: fold (insert_subvector (concat_vectors X, Y), Z) -> + // (concat_vectors Z, Y) + if (InsIdx == 0) + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, + N->getOperand(1), N0.getOperand(1)); + + // Upper half: fold (insert_subvector (concat_vectors X, Y), Z) -> + // (concat_vectors X, Z) + if (InsIdx == VT.getVectorNumElements()/2) + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, + N0.getOperand(0), N->getOperand(1)); + } + + return SDValue(); +} + /// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform /// an AND to a vector_shuffle with the destination vector and a zero vector. /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> @@ -10204,18 +10710,15 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { // this operation. if (LHS.getOpcode() == ISD::BUILD_VECTOR && RHS.getOpcode() == ISD::BUILD_VECTOR) { + // Check if both vectors are constants. If not bail out. + if (!(cast<BuildVectorSDNode>(LHS)->isConstant() && + cast<BuildVectorSDNode>(RHS)->isConstant())) + return SDValue(); + SmallVector<SDValue, 8> Ops; for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) { SDValue LHSOp = LHS.getOperand(i); SDValue RHSOp = RHS.getOperand(i); - // If these two elements can't be folded, bail out. - if ((LHSOp.getOpcode() != ISD::UNDEF && - LHSOp.getOpcode() != ISD::Constant && - LHSOp.getOpcode() != ISD::ConstantFP) || - (RHSOp.getOpcode() != ISD::UNDEF && - RHSOp.getOpcode() != ISD::Constant && - RHSOp.getOpcode() != ISD::ConstantFP)) - break; // Can't fold divide by zero. if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV || @@ -10862,14 +11365,21 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1, bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0 ? CombinerGlobalAA : TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA(); +#ifndef NDEBUG + if (CombinerAAOnlyFunc.getNumOccurrences() && + CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) + UseAA = false; +#endif if (UseAA && SrcValue1 && SrcValue2) { // Use alias analysis information. int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2); int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset; int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset; AliasAnalysis::AliasResult AAResult = - AA.alias(AliasAnalysis::Location(SrcValue1, Overlap1, TBAAInfo1), - AliasAnalysis::Location(SrcValue2, Overlap2, TBAAInfo2)); + AA.alias(AliasAnalysis::Location(SrcValue1, Overlap1, + UseTBAA ? TBAAInfo1 : 0), + AliasAnalysis::Location(SrcValue2, Overlap2, + UseTBAA ? TBAAInfo2 : 0)); if (AAResult == AliasAnalysis::NoAlias) return false; } @@ -10956,7 +11466,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, if (Depth > 6 || Aliases.size() == 2) { Aliases.clear(); Aliases.push_back(OriginalChain); - break; + return; } // Don't bother if we've been before. @@ -11018,6 +11528,63 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, break; } } + + // We need to be careful here to also search for aliases through the + // value operand of a store, etc. Consider the following situation: + // Token1 = ... + // L1 = load Token1, %52 + // S1 = store Token1, L1, %51 + // L2 = load Token1, %52+8 + // S2 = store Token1, L2, %51+8 + // Token2 = Token(S1, S2) + // L3 = load Token2, %53 + // S3 = store Token2, L3, %52 + // L4 = load Token2, %53+8 + // S4 = store Token2, L4, %52+8 + // If we search for aliases of S3 (which loads address %52), and we look + // only through the chain, then we'll miss the trivial dependence on L1 + // (which also loads from %52). We then might change all loads and + // stores to use Token1 as their chain operand, which could result in + // copying %53 into %52 before copying %52 into %51 (which should + // happen first). + // + // The problem is, however, that searching for such data dependencies + // can become expensive, and the cost is not directly related to the + // chain depth. Instead, we'll rule out such configurations here by + // insisting that we've visited all chain users (except for users + // of the original chain, which is not necessary). When doing this, + // we need to look through nodes we don't care about (otherwise, things + // like register copies will interfere with trivial cases). + + SmallVector<const SDNode *, 16> Worklist; + for (SmallPtrSet<SDNode *, 16>::iterator I = Visited.begin(), + IE = Visited.end(); I != IE; ++I) + if (*I != OriginalChain.getNode()) + Worklist.push_back(*I); + + while (!Worklist.empty()) { + const SDNode *M = Worklist.pop_back_val(); + + // We have already visited M, and want to make sure we've visited any uses + // of M that we care about. For uses that we've not visisted, and don't + // care about, queue them to the worklist. + + for (SDNode::use_iterator UI = M->use_begin(), + UIE = M->use_end(); UI != UIE; ++UI) + if (UI.getUse().getValueType() == MVT::Other && Visited.insert(*UI)) { + if (isa<MemIntrinsicSDNode>(*UI) || isa<MemSDNode>(*UI)) { + // We've not visited this use, and we care about it (it could have an + // ordering dependency with the original node). + Aliases.clear(); + Aliases.push_back(OriginalChain); + return; + } + + // We've not visited this use, but we don't care about it. Mark it as + // visited and enqueue it to the worklist. + Worklist.push_back(*UI); + } + } } /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, looking diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index a6f7461..baba51e 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -49,8 +49,8 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/DebugInfo.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instructions.h" @@ -118,7 +118,7 @@ bool FastISel::hasTrivialKill(const Value *V) const { // No-op casts are trivially coalesced by fast-isel. if (const CastInst *Cast = dyn_cast<CastInst>(I)) - if (Cast->isNoopCast(TD.getIntPtrType(Cast->getContext())) && + if (Cast->isNoopCast(DL.getIntPtrType(Cast->getContext())) && !hasTrivialKill(Cast->getOperand(0))) return false; @@ -133,7 +133,7 @@ bool FastISel::hasTrivialKill(const Value *V) const { !(I->getOpcode() == Instruction::BitCast || I->getOpcode() == Instruction::PtrToInt || I->getOpcode() == Instruction::IntToPtr) && - cast<Instruction>(*I->use_begin())->getParent() == I->getParent(); + cast<Instruction>(*I->user_begin())->getParent() == I->getParent(); } unsigned FastISel::getRegForValue(const Value *V) { @@ -192,7 +192,7 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { // Translate this as an integer zero so that it can be // local-CSE'd with actual integer zeros. Reg = - getRegForValue(Constant::getNullValue(TD.getIntPtrType(V->getContext()))); + getRegForValue(Constant::getNullValue(DL.getIntPtrType(V->getContext()))); } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { if (CF->isNullValue()) { Reg = TargetMaterializeFloatZero(CF); @@ -229,7 +229,7 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { Reg = lookUpRegForValue(Op); } else if (isa<UndefValue>(V)) { Reg = createResultReg(TLI.getRegClassFor(VT)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::IMPLICIT_DEF), Reg); } @@ -335,20 +335,20 @@ void FastISel::removeDeadCode(MachineBasicBlock::iterator I, FastISel::SavePoint FastISel::enterLocalValueArea() { MachineBasicBlock::iterator OldInsertPt = FuncInfo.InsertPt; - DebugLoc OldDL = DL; + DebugLoc OldDL = DbgLoc; recomputeInsertPt(); - DL = DebugLoc(); + DbgLoc = DebugLoc(); SavePoint SP = { OldInsertPt, OldDL }; return SP; } void FastISel::leaveLocalValueArea(SavePoint OldInsertPt) { if (FuncInfo.InsertPt != FuncInfo.MBB->begin()) - LastLocalValue = llvm::prior(FuncInfo.InsertPt); + LastLocalValue = std::prev(FuncInfo.InsertPt); // Restore the previous insert position. FuncInfo.InsertPt = OldInsertPt.InsertPt; - DL = OldInsertPt.DL; + DbgLoc = OldInsertPt.DL; } /// SelectBinaryOp - Select and emit code for a binary operator instruction, @@ -484,7 +484,7 @@ bool FastISel::SelectGetElementPtr(const User *I) { unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); if (Field) { // N = N + Offset - TotalOffs += TD.getStructLayout(StTy)->getElementOffset(Field); + TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field); if (TotalOffs >= MaxOffs) { N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); if (N == 0) @@ -503,7 +503,7 @@ bool FastISel::SelectGetElementPtr(const User *I) { if (CI->isZero()) continue; // N = N + Offset TotalOffs += - TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); + DL.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); if (TotalOffs >= MaxOffs) { N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); if (N == 0) @@ -524,7 +524,7 @@ bool FastISel::SelectGetElementPtr(const User *I) { } // N = N + Idx * ElementSize; - uint64_t ElementSize = TD.getTypeAllocSize(Ty); + uint64_t ElementSize = DL.getTypeAllocSize(Ty); std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx); unsigned IdxN = Pair.first; bool IdxNIsKill = Pair.second; @@ -572,7 +572,7 @@ bool FastISel::SelectCall(const User *I) { if (IA->isAlignStack()) ExtraInfo |= InlineAsm::Extra_IsAlignStack; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::INLINEASM)) .addExternalSymbol(IA->getAsmString().c_str()) .addImm(ExtraInfo); @@ -643,11 +643,11 @@ bool FastISel::SelectCall(const User *I) { if (Op) { if (Op->isReg()) { Op->setIsDebug(true); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::DBG_VALUE), false, Op->getReg(), 0, DI->getVariable()); } else - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::DBG_VALUE)) .addOperand(*Op) .addImm(0) @@ -667,26 +667,26 @@ bool FastISel::SelectCall(const User *I) { if (!V) { // Currently the optimizer can produce this; insert an undef to // help debugging. Probably the optimizer should not do this. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addReg(0U).addImm(DI->getOffset()) .addMetadata(DI->getVariable()); } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { if (CI->getBitWidth() > 64) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addCImm(CI).addImm(DI->getOffset()) .addMetadata(DI->getVariable()); else - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addImm(CI->getZExtValue()).addImm(DI->getOffset()) .addMetadata(DI->getVariable()); } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addFPImm(CF).addImm(DI->getOffset()) .addMetadata(DI->getVariable()); } else if (unsigned Reg = lookUpRegForValue(V)) { // FIXME: This does not handle register-indirect values at offset 0. bool IsIndirect = DI->getOffset() != 0; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, IsIndirect, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, IsIndirect, Reg, DI->getOffset(), DI->getVariable()); } else { // We can't yet handle anything else here because it would require @@ -798,8 +798,8 @@ bool FastISel::SelectBitCast(const User *I) { // Don't attempt a cross-class copy. It will likely fail. if (SrcClass == DstClass) { ResultReg = createResultReg(DstClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(Op0); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(Op0); } } @@ -822,7 +822,7 @@ FastISel::SelectInstruction(const Instruction *I) { if (!HandlePHINodesInSuccessorBlocks(I->getParent())) return false; - DL = I->getDebugLoc(); + DbgLoc = I->getDebugLoc(); MachineBasicBlock::iterator SavedInsertPt = FuncInfo.InsertPt; @@ -840,7 +840,7 @@ FastISel::SelectInstruction(const Instruction *I) { // First, try doing target-independent selection. if (SelectOperator(I, I->getOpcode())) { ++NumFastIselSuccessIndependent; - DL = DebugLoc(); + DbgLoc = DebugLoc(); return true; } // Remove dead code. However, ignore call instructions since we've flushed @@ -855,7 +855,7 @@ FastISel::SelectInstruction(const Instruction *I) { SavedInsertPt = FuncInfo.InsertPt; if (TargetSelectInstruction(I)) { ++NumFastIselSuccessTarget; - DL = DebugLoc(); + DbgLoc = DebugLoc(); return true; } // Check for dead code and remove as necessary. @@ -863,7 +863,7 @@ FastISel::SelectInstruction(const Instruction *I) { if (SavedInsertPt != FuncInfo.InsertPt) removeDeadCode(FuncInfo.InsertPt, SavedInsertPt); - DL = DebugLoc(); + DbgLoc = DebugLoc(); return false; } @@ -871,7 +871,7 @@ FastISel::SelectInstruction(const Instruction *I) { /// unless it is the immediate (fall-through) successor, and update /// the CFG. void -FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DL) { +FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DbgLoc) { if (FuncInfo.MBB->getBasicBlock()->size() > 1 && FuncInfo.MBB->isLayoutSuccessor(MSucc)) { @@ -881,7 +881,7 @@ FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DL) { } else { // The unconditional branch case. TII.InsertBranch(*FuncInfo.MBB, MSucc, NULL, - SmallVector<MachineOperand, 0>(), DL); + SmallVector<MachineOperand, 0>(), DbgLoc); } FuncInfo.MBB->addSuccessor(MSucc); } @@ -1096,7 +1096,7 @@ FastISel::FastISel(FunctionLoweringInfo &funcInfo, MFI(*FuncInfo.MF->getFrameInfo()), MCP(*FuncInfo.MF->getConstantPool()), TM(FuncInfo.MF->getTarget()), - TD(*TM.getDataLayout()), + DL(*TM.getDataLayout()), TII(*TM.getInstrInfo()), TLI(*TM.getTargetLowering()), TRI(*TM.getRegisterInfo()), @@ -1209,7 +1209,7 @@ unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg); return ResultReg; } @@ -1220,13 +1220,13 @@ unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode, const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addReg(Op0, Op0IsKill * RegState::Kill); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; @@ -1240,15 +1240,15 @@ unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode, const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1262,17 +1262,17 @@ unsigned FastISel::FastEmitInst_rrr(unsigned MachineInstOpcode, const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) .addReg(Op2, Op2IsKill * RegState::Kill); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) .addReg(Op2, Op2IsKill * RegState::Kill); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1285,15 +1285,15 @@ unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode, const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addImm(Imm); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addImm(Imm); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1306,17 +1306,17 @@ unsigned FastISel::FastEmitInst_rii(unsigned MachineInstOpcode, const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addImm(Imm1) .addImm(Imm2); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addImm(Imm1) .addImm(Imm2); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1329,15 +1329,15 @@ unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode, const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addFPImm(FPImm); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addFPImm(FPImm); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1351,17 +1351,17 @@ unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode, const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) .addImm(Imm); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) .addImm(Imm); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1375,17 +1375,17 @@ unsigned FastISel::FastEmitInst_rrii(unsigned MachineInstOpcode, const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) .addImm(Imm1).addImm(Imm2); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) .addImm(Imm1).addImm(Imm2); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1397,11 +1397,11 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode, const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg).addImm(Imm); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg).addImm(Imm); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II).addImm(Imm); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addImm(Imm); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1413,12 +1413,12 @@ unsigned FastISel::FastEmitInst_ii(unsigned MachineInstOpcode, const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addImm(Imm1).addImm(Imm2); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II).addImm(Imm1).addImm(Imm2); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addImm(Imm1).addImm(Imm2); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1432,7 +1432,7 @@ unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT, const TargetRegisterClass *RC = MRI.getRegClass(Op0); MRI.constrainRegClass(Op0, TRI.getSubClassWithSubReg(RC, Idx)); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, - DL, TII.get(TargetOpcode::COPY), ResultReg) + DbgLoc, TII.get(TargetOpcode::COPY), ResultReg) .addReg(Op0, getKillRegState(Op0IsKill), Idx); return ResultReg; } @@ -1498,9 +1498,9 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // Set the DebugLoc for the copy. Prefer the location of the operand // if there is one; use the location of the PHI otherwise. - DL = PN->getDebugLoc(); + DbgLoc = PN->getDebugLoc(); if (const Instruction *Inst = dyn_cast<Instruction>(PHIOp)) - DL = Inst->getDebugLoc(); + DbgLoc = Inst->getDebugLoc(); unsigned Reg = getRegForValue(PHIOp); if (Reg == 0) { @@ -1508,7 +1508,7 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { return false; } FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg)); - DL = DebugLoc(); + DbgLoc = DebugLoc(); } } @@ -1523,7 +1523,7 @@ bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) { // this by scanning the single-use users of the load until we get to FoldInst. unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs. - const Instruction *TheUser = LI->use_back(); + const Instruction *TheUser = LI->user_back(); while (TheUser != FoldInst && // Scan up until we find FoldInst. // Stay in the right block. TheUser->getParent() == FoldInst->getParent() && @@ -1532,7 +1532,7 @@ bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) { if (!TheUser->hasOneUse()) return false; - TheUser = TheUser->use_back(); + TheUser = TheUser->user_back(); } // If we didn't find the fold instruction, then we failed to collapse the @@ -1559,7 +1559,7 @@ bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) { return false; MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LoadReg); - MachineInstr *User = &*RI; + MachineInstr *User = RI->getParent(); // Set the insertion point properly. Folding the load can cause generation of // other random instructions (like sign extends) for addressing modes; make @@ -1576,8 +1576,8 @@ bool FastISel::canFoldAddIntoGEP(const User *GEP, const Value *Add) { if (!isa<AddOperator>(Add)) return false; // Type size needs to match. - if (TD.getTypeSizeInBits(GEP->getType()) != - TD.getTypeSizeInBits(Add->getType())) + if (DL.getTypeSizeInBits(GEP->getType()) != + DL.getTypeSizeInBits(Add->getType())) return false; // Must be in the same basic block. if (isa<Instruction>(Add) && diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 4309dc1..5f0006e 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -21,8 +21,8 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/DebugInfo.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" @@ -32,6 +32,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" @@ -46,16 +47,15 @@ static bool isUsedOutsideOfDefiningBlock(const Instruction *I) { if (I->use_empty()) return false; if (isa<PHINode>(I)) return true; const BasicBlock *BB = I->getParent(); - for (Value::const_use_iterator UI = I->use_begin(), E = I->use_end(); - UI != E; ++UI) { - const User *U = *UI; + for (const User *U : I->users()) if (cast<Instruction>(U)->getParent() != BB || isa<PHINode>(U)) return true; - } + return false; } -void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { +void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, + SelectionDAG *DAG) { const TargetLowering *TLI = TM.getTargetLowering(); Fn = &fn; @@ -74,7 +74,12 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { // them. Function::const_iterator BB = Fn->begin(), EB = Fn->end(); for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) - if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) + if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) { + // Don't fold inalloca allocas or other dynamic allocas into the initial + // stack frame allocation, even if they are in the entry block. + if (!AI->isStaticAlloca()) + continue; + if (const ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) { Type *Ty = AI->getAllocatedType(); uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty); @@ -85,21 +90,51 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { TySize *= CUI->getZExtValue(); // Get total allocated size. if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects. - // The object may need to be placed onto the stack near the stack - // protector if one exists. Determine here if this object is a suitable - // candidate. I.e., it would trigger the creation of a stack protector. - bool MayNeedSP = - (AI->isArrayAllocation() || - (TySize >= 8 && isa<ArrayType>(Ty) && - cast<ArrayType>(Ty)->getElementType()->isIntegerTy(8))); StaticAllocaMap[AI] = - MF->getFrameInfo()->CreateStackObject(TySize, Align, false, - MayNeedSP, AI); + MF->getFrameInfo()->CreateStackObject(TySize, Align, false, AI); } + } for (; BB != EB; ++BB) for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + // Look for dynamic allocas. + if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) { + if (!AI->isStaticAlloca()) { + unsigned Align = std::max( + (unsigned)TLI->getDataLayout()->getPrefTypeAlignment( + AI->getAllocatedType()), + AI->getAlignment()); + unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); + if (Align <= StackAlign) + Align = 0; + // Inform the Frame Information that we have variable-sized objects. + MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1, AI); + } + } + + // Look for inline asm that clobbers the SP register. + if (isa<CallInst>(I) || isa<InvokeInst>(I)) { + ImmutableCallSite CS(I); + if (isa<InlineAsm>(CS.getCalledValue())) { + unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); + std::vector<TargetLowering::AsmOperandInfo> Ops = + TLI->ParseConstraints(CS); + for (size_t I = 0, E = Ops.size(); I != E; ++I) { + TargetLowering::AsmOperandInfo &Op = Ops[I]; + if (Op.Type == InlineAsm::isClobber) { + // Clobbers don't have SDValue operands, hence SDValue(). + TLI->ComputeConstraintToUse(Op, SDValue(), DAG); + std::pair<unsigned, const TargetRegisterClass*> PhysReg = + TLI->getRegForInlineAsmConstraint(Op.ConstraintCode, + Op.ConstraintVT); + if (PhysReg.first == SP) + MF->getFrameInfo()->setHasInlineAsmWithSPAdjust(true); + } + } + } + } + // Mark values used outside their block as exported, by allocating // a virtual register for them. if (isUsedOutsideOfDefiningBlock(I)) diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 3a8fb85..1c596b8 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -220,10 +220,19 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, unsigned VRBase = 0; const TargetRegisterClass *RC = TRI->getAllocatableClass(TII->getRegClass(II, i, TRI, *MF)); - // If the register class is unknown for the given definition, then try to - // infer one from the value type. - if (!RC && i < NumResults) - RC = TLI->getRegClassFor(Node->getSimpleValueType(i)); + // Always let the value type influence the used register class. The + // constraints on the instruction may be too lax to represent the value + // type correctly. For example, a 64-bit float (X86::FR64) can't live in + // the 32-bit float super-class (X86::FR32). + if (i < NumResults && TLI->isTypeLegal(Node->getSimpleValueType(i))) { + const TargetRegisterClass *VTRC = + TLI->getRegClassFor(Node->getSimpleValueType(i)); + if (RC) + VTRC = TRI->getCommonSubClass(RC, VTRC); + if (VTRC) + RC = VTRC; + } + if (II.OpInfo[i].isOptionalDef()) { // Optional def must be a physical register. unsigned NumResults = CountResults(Node); @@ -731,10 +740,16 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, unsigned NumDefs = II.getNumDefs(); const uint16_t *ScratchRegs = NULL; - // Handle PATCHPOINT specially and then use the generic code. - if (Opc == TargetOpcode::PATCHPOINT) { - unsigned CC = Node->getConstantOperandVal(PatchPointOpers::CCPos); - NumDefs = NumResults; + // Handle STACKMAP and PATCHPOINT specially and then use the generic code. + if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) { + // Stackmaps do not have arguments and do not preserve their calling + // convention. However, to simplify runtime support, they clobber the same + // scratch registers as AnyRegCC. + unsigned CC = CallingConv::AnyReg; + if (Opc == TargetOpcode::PATCHPOINT) { + CC = Node->getConstantOperandVal(PatchPointOpers::CCPos); + NumDefs = NumResults; + } ScratchRegs = TLI->getScratchRegisters((CallingConv::ID) CC); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 9061ae9..20afb3d 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -13,15 +13,16 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Triple.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" -#include "llvm/DebugInfo.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" @@ -152,10 +153,10 @@ private: public: // DAGUpdateListener implementation. - virtual void NodeDeleted(SDNode *N, SDNode *E) { + void NodeDeleted(SDNode *N, SDNode *E) override { ForgetNode(N); } - virtual void NodeUpdated(SDNode *N) {} + void NodeUpdated(SDNode *N) override {} // Node replacement helpers void ReplacedNode(SDNode *N) { @@ -729,10 +730,11 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { MVT VT = Value.getSimpleValueType(); switch (TLI.getOperationAction(ISD::STORE, VT)) { default: llvm_unreachable("This action is not supported yet!"); - case TargetLowering::Legal: + case TargetLowering::Legal: { // If this is an unaligned store and the target doesn't support it, // expand it. - if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { + unsigned AS = ST->getAddressSpace(); + if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT(), AS)) { Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty); if (ST->getAlignment() < ABIAlignment) @@ -740,6 +742,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { DAG, TLI, this); } break; + } case TargetLowering::Custom: { SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); if (Res.getNode()) @@ -807,7 +810,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value, DAG.getConstant(RoundWidth, - TLI.getShiftAmountTy(Value.getValueType()))); + TLI.getShiftAmountTy(Value.getValueType()))); Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, isVolatile, isNonTemporal, @@ -818,7 +821,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // Store the top RoundWidth bits. Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value, DAG.getConstant(ExtraWidth, - TLI.getShiftAmountTy(Value.getValueType()))); + TLI.getShiftAmountTy(Value.getValueType()))); Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(), RoundVT, isVolatile, isNonTemporal, Alignment, TBAAInfo); @@ -826,7 +829,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, isVolatile, isNonTemporal, @@ -840,16 +843,18 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { switch (TLI.getTruncStoreAction(ST->getValue().getSimpleValueType(), StVT.getSimpleVT())) { default: llvm_unreachable("This action is not supported yet!"); - case TargetLowering::Legal: + case TargetLowering::Legal: { + unsigned AS = ST->getAddressSpace(); // If this is an unaligned store and the target doesn't support it, // expand it. - if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { + if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT(), AS)) { Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty); if (ST->getAlignment() < ABIAlignment) ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this); } break; + } case TargetLowering::Custom: { SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); if (Res.getNode()) @@ -889,10 +894,11 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { switch (TLI.getOperationAction(Node->getOpcode(), VT)) { default: llvm_unreachable("This action is not supported yet!"); - case TargetLowering::Legal: + case TargetLowering::Legal: { + unsigned AS = LD->getAddressSpace(); // If this is an unaligned load and the target doesn't support it, // expand it. - if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { + if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT(), AS)) { Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment = TLI.getDataLayout()->getABITypeAlignment(Ty); @@ -901,6 +907,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { } } break; + } case TargetLowering::Custom: { SDValue Res = TLI.LowerOperation(RVal, DAG); if (Res.getNode()) { @@ -1017,7 +1024,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // Move the top bits to the right place. Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, DAG.getConstant(RoundWidth, - TLI.getShiftAmountTy(Hi.getValueType()))); + TLI.getShiftAmountTy(Hi.getValueType()))); // Join the hi and lo parts. Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); @@ -1047,7 +1054,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // Move the top bits to the right place. Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, DAG.getConstant(ExtraWidth, - TLI.getShiftAmountTy(Hi.getValueType()))); + TLI.getShiftAmountTy(Hi.getValueType()))); // Join the hi and lo parts. Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); @@ -1059,77 +1066,82 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { switch (TLI.getLoadExtAction(ExtType, SrcVT.getSimpleVT())) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Custom: - isCustom = true; - // FALLTHROUGH + isCustom = true; + // FALLTHROUGH case TargetLowering::Legal: { - Value = SDValue(Node, 0); - Chain = SDValue(Node, 1); - - if (isCustom) { - SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); - if (Res.getNode()) { - Value = Res; - Chain = Res.getValue(1); - } - } else { - // If this is an unaligned load and the target doesn't support it, - // expand it. - if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { - Type *Ty = - LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = - TLI.getDataLayout()->getABITypeAlignment(Ty); - if (LD->getAlignment() < ABIAlignment){ - ExpandUnalignedLoad(cast<LoadSDNode>(Node), - DAG, TLI, Value, Chain); - } - } - } - break; + Value = SDValue(Node, 0); + Chain = SDValue(Node, 1); + + if (isCustom) { + SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); + if (Res.getNode()) { + Value = Res; + Chain = Res.getValue(1); + } + } else { + // If this is an unaligned load and the target doesn't support + // it, expand it. + EVT MemVT = LD->getMemoryVT(); + unsigned AS = LD->getAddressSpace(); + if (!TLI.allowsUnalignedMemoryAccesses(MemVT, AS)) { + Type *Ty = + LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment = + TLI.getDataLayout()->getABITypeAlignment(Ty); + if (LD->getAlignment() < ABIAlignment){ + ExpandUnalignedLoad(cast<LoadSDNode>(Node), + DAG, TLI, Value, Chain); + } + } + } + break; } case TargetLowering::Expand: - if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && TLI.isTypeLegal(SrcVT)) { - SDValue Load = DAG.getLoad(SrcVT, dl, Chain, Ptr, - LD->getMemOperand()); - unsigned ExtendOp; - switch (ExtType) { - case ISD::EXTLOAD: - ExtendOp = (SrcVT.isFloatingPoint() ? - ISD::FP_EXTEND : ISD::ANY_EXTEND); - break; - case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break; - case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break; - default: llvm_unreachable("Unexpected extend load type!"); - } - Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); - Chain = Load.getValue(1); - break; - } - - assert(!SrcVT.isVector() && - "Vector Loads are handled in LegalizeVectorOps"); - - // FIXME: This does not work for vectors on most targets. Sign- and - // zero-extend operations are currently folded into extending loads, - // whether they are legal or not, and then we end up here without any - // support for legalizing them. - assert(ExtType != ISD::EXTLOAD && - "EXTLOAD should always be supported!"); - // Turn the unsupported load into an EXTLOAD followed by an explicit - // zero/sign extend inreg. - SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), - Chain, Ptr, SrcVT, - LD->getMemOperand()); - SDValue ValRes; - if (ExtType == ISD::SEXTLOAD) - ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, - Result.getValueType(), - Result, DAG.getValueType(SrcVT)); - else - ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType()); - Value = ValRes; - Chain = Result.getValue(1); - break; + if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && + TLI.isTypeLegal(SrcVT)) { + SDValue Load = DAG.getLoad(SrcVT, dl, Chain, Ptr, + LD->getMemOperand()); + unsigned ExtendOp; + switch (ExtType) { + case ISD::EXTLOAD: + ExtendOp = (SrcVT.isFloatingPoint() ? + ISD::FP_EXTEND : ISD::ANY_EXTEND); + break; + case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break; + case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break; + default: llvm_unreachable("Unexpected extend load type!"); + } + Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); + Chain = Load.getValue(1); + break; + } + + assert(!SrcVT.isVector() && + "Vector Loads are handled in LegalizeVectorOps"); + + // FIXME: This does not work for vectors on most targets. Sign- + // and zero-extend operations are currently folded into extending + // loads, whether they are legal or not, and then we end up here + // without any support for legalizing them. + assert(ExtType != ISD::EXTLOAD && + "EXTLOAD should always be supported!"); + // Turn the unsupported load into an EXTLOAD followed by an + // explicit zero/sign extend inreg. + SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, + Node->getValueType(0), + Chain, Ptr, SrcVT, + LD->getMemOperand()); + SDValue ValRes; + if (ExtType == ISD::SEXTLOAD) + ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, + Result.getValueType(), + Result, DAG.getValueType(SrcVT)); + else + ValRes = DAG.getZeroExtendInReg(Result, dl, + SrcVT.getScalarType()); + Value = ValRes; + Chain = Result.getValue(1); + break; } } @@ -1383,10 +1395,39 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { SDValue Vec = Op.getOperand(0); SDValue Idx = Op.getOperand(1); SDLoc dl(Op); - // Store the value to a temporary stack slot, then LOAD the returned part. - SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType()); - SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, - MachinePointerInfo(), false, false, 0); + + // Before we generate a new store to a temporary stack slot, see if there is + // already one that we can use. There often is because when we scalarize + // vector operations (using SelectionDAG::UnrollVectorOp for example) a whole + // series of EXTRACT_VECTOR_ELT nodes are generated, one for each element in + // the vector. If all are expanded here, we don't want one store per vector + // element. + SDValue StackPtr, Ch; + for (SDNode::use_iterator UI = Vec.getNode()->use_begin(), + UE = Vec.getNode()->use_end(); UI != UE; ++UI) { + SDNode *User = *UI; + if (StoreSDNode *ST = dyn_cast<StoreSDNode>(User)) { + if (ST->isIndexed() || ST->isTruncatingStore() || + ST->getValue() != Vec) + continue; + + // Make sure that nothing else could have stored into the destination of + // this store. + if (!ST->getChain().reachesChainWithoutSideEffects(DAG.getEntryNode())) + continue; + + StackPtr = ST->getBasePtr(); + Ch = SDValue(ST, 0); + break; + } + } + + if (!Ch.getNode()) { + // Store the value to a temporary stack slot, then LOAD the returned part. + StackPtr = DAG.CreateStackTemporary(Vec.getValueType()); + Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, + MachinePointerInfo(), false, false, 0); + } // Add the offset to the index. unsigned EltSize = @@ -1530,9 +1571,8 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { // the pointer so that the loaded integer will contain the sign bit. unsigned Strides = (FloatVT.getSizeInBits()-1)/LoadTy.getSizeInBits(); unsigned ByteOffset = (Strides * LoadTy.getSizeInBits()) / 8; - LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(), - LoadPtr, - DAG.getConstant(ByteOffset, LoadPtr.getValueType())); + LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(), LoadPtr, + DAG.getConstant(ByteOffset, LoadPtr.getValueType())); // Load a legal integer containing the sign bit. SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(), false, false, false, 0); @@ -1555,8 +1595,8 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { // Select between the nabs and abs value based on the sign bit of // the input. return DAG.getSelect(dl, AbsVal.getValueType(), SignBit, - DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal), - AbsVal); + DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal), + AbsVal); } void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, @@ -1776,6 +1816,98 @@ SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) { false, false, false, 0); } +static bool +ExpandBVWithShuffles(SDNode *Node, SelectionDAG &DAG, + const TargetLowering &TLI, SDValue &Res) { + unsigned NumElems = Node->getNumOperands(); + SDLoc dl(Node); + EVT VT = Node->getValueType(0); + + // Try to group the scalars into pairs, shuffle the pairs together, then + // shuffle the pairs of pairs together, etc. until the vector has + // been built. This will work only if all of the necessary shuffle masks + // are legal. + + // We do this in two phases; first to check the legality of the shuffles, + // and next, assuming that all shuffles are legal, to create the new nodes. + for (int Phase = 0; Phase < 2; ++Phase) { + SmallVector<std::pair<SDValue, SmallVector<int, 16> >, 16> IntermedVals, + NewIntermedVals; + for (unsigned i = 0; i < NumElems; ++i) { + SDValue V = Node->getOperand(i); + if (V.getOpcode() == ISD::UNDEF) + continue; + + SDValue Vec; + if (Phase) + Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, V); + IntermedVals.push_back(std::make_pair(Vec, SmallVector<int, 16>(1, i))); + } + + while (IntermedVals.size() > 2) { + NewIntermedVals.clear(); + for (unsigned i = 0, e = (IntermedVals.size() & ~1u); i < e; i += 2) { + // This vector and the next vector are shuffled together (simply to + // append the one to the other). + SmallVector<int, 16> ShuffleVec(NumElems, -1); + + SmallVector<int, 16> FinalIndices; + FinalIndices.reserve(IntermedVals[i].second.size() + + IntermedVals[i+1].second.size()); + + int k = 0; + for (unsigned j = 0, f = IntermedVals[i].second.size(); j != f; + ++j, ++k) { + ShuffleVec[k] = j; + FinalIndices.push_back(IntermedVals[i].second[j]); + } + for (unsigned j = 0, f = IntermedVals[i+1].second.size(); j != f; + ++j, ++k) { + ShuffleVec[k] = NumElems + j; + FinalIndices.push_back(IntermedVals[i+1].second[j]); + } + + SDValue Shuffle; + if (Phase) + Shuffle = DAG.getVectorShuffle(VT, dl, IntermedVals[i].first, + IntermedVals[i+1].first, + ShuffleVec.data()); + else if (!TLI.isShuffleMaskLegal(ShuffleVec, VT)) + return false; + NewIntermedVals.push_back(std::make_pair(Shuffle, FinalIndices)); + } + + // If we had an odd number of defined values, then append the last + // element to the array of new vectors. + if ((IntermedVals.size() & 1) != 0) + NewIntermedVals.push_back(IntermedVals.back()); + + IntermedVals.swap(NewIntermedVals); + } + + assert(IntermedVals.size() <= 2 && IntermedVals.size() > 0 && + "Invalid number of intermediate vectors"); + SDValue Vec1 = IntermedVals[0].first; + SDValue Vec2; + if (IntermedVals.size() > 1) + Vec2 = IntermedVals[1].first; + else if (Phase) + Vec2 = DAG.getUNDEF(VT); + + SmallVector<int, 16> ShuffleVec(NumElems, -1); + for (unsigned i = 0, e = IntermedVals[0].second.size(); i != e; ++i) + ShuffleVec[IntermedVals[0].second[i]] = i; + for (unsigned i = 0, e = IntermedVals[1].second.size(); i != e; ++i) + ShuffleVec[IntermedVals[1].second[i]] = NumElems + i; + + if (Phase) + Res = DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec.data()); + else if (!TLI.isShuffleMaskLegal(ShuffleVec, VT)) + return false; + } + + return true; +} /// ExpandBUILD_VECTOR - Expand a BUILD_VECTOR node on targets that don't /// support the operation, but do support the resultant vector type. @@ -1850,25 +1982,38 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { false, false, false, Alignment); } - if (!MoreThanTwoValues) { - SmallVector<int, 8> ShuffleVec(NumElems, -1); - for (unsigned i = 0; i < NumElems; ++i) { - SDValue V = Node->getOperand(i); - if (V.getOpcode() == ISD::UNDEF) - continue; - ShuffleVec[i] = V == Value1 ? 0 : NumElems; - } - if (TLI.isShuffleMaskLegal(ShuffleVec, Node->getValueType(0))) { - // Get the splatted value into the low element of a vector register. - SDValue Vec1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value1); - SDValue Vec2; - if (Value2.getNode()) - Vec2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value2); - else - Vec2 = DAG.getUNDEF(VT); + SmallSet<SDValue, 16> DefinedValues; + for (unsigned i = 0; i < NumElems; ++i) { + if (Node->getOperand(i).getOpcode() == ISD::UNDEF) + continue; + DefinedValues.insert(Node->getOperand(i)); + } - // Return shuffle(LowValVec, undef, <0,0,0,0>) - return DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec.data()); + if (TLI.shouldExpandBuildVectorWithShuffles(VT, DefinedValues.size())) { + if (!MoreThanTwoValues) { + SmallVector<int, 8> ShuffleVec(NumElems, -1); + for (unsigned i = 0; i < NumElems; ++i) { + SDValue V = Node->getOperand(i); + if (V.getOpcode() == ISD::UNDEF) + continue; + ShuffleVec[i] = V == Value1 ? 0 : NumElems; + } + if (TLI.isShuffleMaskLegal(ShuffleVec, Node->getValueType(0))) { + // Get the splatted value into the low element of a vector register. + SDValue Vec1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value1); + SDValue Vec2; + if (Value2.getNode()) + Vec2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value2); + else + Vec2 = DAG.getUNDEF(VT); + + // Return shuffle(LowValVec, undef, <0,0,0,0>) + return DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec.data()); + } + } else { + SDValue Res; + if (ExpandBVWithShuffles(Node, DAG, TLI, Res)) + return Res; } } @@ -2868,6 +3013,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Node->getOperand(1), Zero, Zero, cast<AtomicSDNode>(Node)->getMemOperand(), cast<AtomicSDNode>(Node)->getOrdering(), + cast<AtomicSDNode>(Node)->getOrdering(), cast<AtomicSDNode>(Node)->getSynchScope()); Results.push_back(Swap.getValue(0)); Results.push_back(Swap.getValue(1)); @@ -3099,7 +3245,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { EVT NewEltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT); // BUILD_VECTOR operands are allowed to be wider than the element type. - // But if NewEltVT is smaller that EltVT the BUILD_VECTOR does not accept it + // But if NewEltVT is smaller that EltVT the BUILD_VECTOR does not accept + // it. if (NewEltVT.bitsLT(EltVT)) { // Convert shuffle node. @@ -3107,8 +3254,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // cast operands to v8i32 and re-build the mask. // Calculate new VT, the size of the new VT should be equal to original. - EVT NewVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT, - VT.getSizeInBits()/NewEltVT.getSizeInBits()); + EVT NewVT = + EVT::getVectorVT(*DAG.getContext(), NewEltVT, + VT.getSizeInBits() / NewEltVT.getSizeInBits()); assert(NewVT.bitsEq(VT)); // cast operands to new VT @@ -3116,7 +3264,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Op1 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op1); // Convert the shuffle mask - unsigned int factor = NewVT.getVectorNumElements()/VT.getVectorNumElements(); + unsigned int factor = + NewVT.getVectorNumElements()/VT.getVectorNumElements(); // EltVT gets smaller assert(factor > 0); @@ -3782,8 +3931,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { } else { Tmp2 = DAG.getConstant(0, Tmp1.getValueType()); CC = DAG.getCondCode(ISD::SETNE); - Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2, - Tmp3, Tmp4, CC); + Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, + Tmp2, Tmp3, Tmp4, CC); } } Results.push_back(Tmp1); @@ -3813,8 +3962,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { } else { Tmp3 = DAG.getConstant(0, Tmp2.getValueType()); Tmp4 = DAG.getCondCode(ISD::SETNE); - Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2, - Tmp3, Node->getOperand(4)); + Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, + Tmp2, Tmp3, Node->getOperand(4)); } Results.push_back(Tmp1); break; @@ -3976,7 +4125,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { } case ISD::SELECT: { unsigned ExtOp, TruncOp; - if (Node->getValueType(0).isVector()) { + if (Node->getValueType(0).isVector() || + Node->getValueType(0).getSizeInBits() == NVT.getSizeInBits()) { ExtOp = ISD::BITCAST; TruncOp = ISD::BITCAST; } else if (Node->getValueType(0).isInteger()) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 4255948..18b2376 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -193,10 +193,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_Atomic2(AtomicSDNode *N) { SDValue Op2 = GetPromotedInteger(N->getOperand(2)); SDValue Op3 = GetPromotedInteger(N->getOperand(3)); - SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N), - N->getMemoryVT(), N->getChain(), N->getBasePtr(), - Op2, Op3, N->getMemOperand(), N->getOrdering(), - N->getSynchScope()); + SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N), N->getMemoryVT(), + N->getChain(), N->getBasePtr(), Op2, Op3, + N->getMemOperand(), N->getSuccessOrdering(), + N->getFailureOrdering(), N->getSynchScope()); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); @@ -2448,6 +2448,7 @@ void DAGTypeLegalizer::ExpandIntRes_ATOMIC_LOAD(SDNode *N, N->getOperand(1), Zero, Zero, cast<AtomicSDNode>(N)->getMemOperand(), cast<AtomicSDNode>(N)->getOrdering(), + cast<AtomicSDNode>(N)->getOrdering(), cast<AtomicSDNode>(N)->getSynchScope()); ReplaceValueWith(SDValue(N, 0), Swap.getValue(0)); ReplaceValueWith(SDValue(N, 1), Swap.getValue(1)); @@ -2577,13 +2578,17 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, // this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3) TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, AfterLegalizeTypes, true, NULL); SDValue Tmp1, Tmp2; - Tmp1 = TLI.SimplifySetCC(getSetCCResultType(LHSLo.getValueType()), - LHSLo, RHSLo, LowCC, false, DagCombineInfo, dl); + if (TLI.isTypeLegal(LHSLo.getValueType()) && + TLI.isTypeLegal(RHSLo.getValueType())) + Tmp1 = TLI.SimplifySetCC(getSetCCResultType(LHSLo.getValueType()), + LHSLo, RHSLo, LowCC, false, DagCombineInfo, dl); if (!Tmp1.getNode()) Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSLo.getValueType()), LHSLo, RHSLo, LowCC); - Tmp2 = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()), - LHSHi, RHSHi, CCCode, false, DagCombineInfo, dl); + if (TLI.isTypeLegal(LHSHi.getValueType()) && + TLI.isTypeLegal(RHSHi.getValueType())) + Tmp2 = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, CCCode, false, DagCombineInfo, dl); if (!Tmp2.getNode()) Tmp2 = DAG.getNode(ISD::SETCC, dl, getSetCCResultType(LHSHi.getValueType()), diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index eb13230..e141883 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -634,7 +634,7 @@ namespace { : SelectionDAG::DAGUpdateListener(dtl.getDAG()), DTL(dtl), NodesToAnalyze(nta) {} - virtual void NodeDeleted(SDNode *N, SDNode *E) { + void NodeDeleted(SDNode *N, SDNode *E) override { assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess && N->getNodeId() != DAGTypeLegalizer::Processed && "Invalid node ID for RAUW deletion!"); @@ -655,7 +655,7 @@ namespace { NodesToAnalyze.insert(E); } - virtual void NodeUpdated(SDNode *N) { + void NodeUpdated(SDNode *N) override { // Node updates can mean pretty much anything. It is possible that an // operand was set to something already processed (f.e.) in which case // this node could become ready. Recompute its flags. diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 13bb08f..947ea10 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -541,6 +541,7 @@ private: SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N); SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo); + SDValue ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo); //===--------------------------------------------------------------------===// // Vector Splitting Support: LegalizeVectorTypes.cpp @@ -670,13 +671,13 @@ private: LoadSDNode *LD, ISD::LoadExtType ExtType); /// Helper genWidenVectorStores - Helper function to generate a set of - /// stores to store a widen vector into non widen memory + /// stores to store a widen vector into non-widen memory /// StChain: list of chains for the stores we have generated /// ST: store of a widen value void GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST); /// Helper genWidenVectorTruncStores - Helper function to generate a set of - /// stores to store a truncate widen vector into non widen memory + /// stores to store a truncate widen vector into non-widen memory /// StChain: list of chains for the stores we have generated /// ST: store of a widen value void GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index c749fde..e9424f2 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -78,8 +78,8 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BITCAST"); InOp = GetWidenedVector(InOp); EVT LoVT, HiVT; - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(InVT); - llvm::tie(Lo, Hi) = DAG.SplitVector(InOp, dl, LoVT, HiVT); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(InVT); + std::tie(Lo, Hi) = DAG.SplitVector(InOp, dl, LoVT, HiVT); if (TLI.isBigEndian()) std::swap(Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); @@ -518,7 +518,7 @@ void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, if (getTypeAction(Cond.getValueType()) == TargetLowering::TypeSplitVector) GetSplitVector(Cond, CL, CH); else - llvm::tie(CL, CH) = DAG.SplitVector(Cond, dl); + std::tie(CL, CH) = DAG.SplitVector(Cond, dl); } Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), CL, LL, RL); @@ -540,7 +540,7 @@ void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::SplitRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT LoVT, HiVT; - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); Lo = DAG.getUNDEF(LoVT); Hi = DAG.getUNDEF(HiVT); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 2c3cdcc..551d054 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -77,6 +77,10 @@ class VectorLegalizer { // Implements [SU]INT_TO_FP vector promotion; this is a [zs]ext of the input // operand to the next size up. SDValue PromoteVectorOpINT_TO_FP(SDValue Op); + // Implements FP_TO_[SU]INT vector promotion of the result type; it is + // promoted to the next size up integer type. The result is then truncated + // back to the original type. + SDValue PromoteVectorOpFP_TO_INT(SDValue Op, bool isSigned); public: bool Run(); @@ -88,7 +92,7 @@ bool VectorLegalizer::Run() { // Before we start legalizing vector nodes, check if there are any vectors. bool HasVectors = false; for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), - E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I) { + E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) { // Check if the values of the nodes contain vectors. We don't need to check // the operands because we are going to check their values at some point. for (SDNode::value_iterator J = I->value_begin(), E = I->value_end(); @@ -112,7 +116,7 @@ bool VectorLegalizer::Run() { // node is only legalized after all of its operands are legalized. DAG.AssignTopologicalOrder(); for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), - E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I) + E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) LegalizeOp(SDValue(I, 0)); // Finally, it's possible the root changed. Get the new root. @@ -210,6 +214,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::SRL: case ISD::ROTL: case ISD::ROTR: + case ISD::BSWAP: case ISD::CTLZ: case ISD::CTTZ: case ISD::CTLZ_ZERO_UNDEF: @@ -273,6 +278,12 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { Result = PromoteVectorOpINT_TO_FP(Op); Changed = true; break; + case ISD::FP_TO_UINT: + case ISD::FP_TO_SINT: + // Promote the operation by extending the operand. + Result = PromoteVectorOpFP_TO_INT(Op, Op->getOpcode() == ISD::FP_TO_SINT); + Changed = true; + break; } break; case TargetLowering::Legal: break; @@ -351,14 +362,9 @@ SDValue VectorLegalizer::PromoteVectorOpINT_TO_FP(SDValue Op) { // // Increase the bitwidth of the element to the next pow-of-two // (which is greater than 8 bits). - unsigned NumElts = VT.getVectorNumElements(); - EVT EltVT = VT.getVectorElementType(); - EltVT = EVT::getIntegerVT(*DAG.getContext(), 2 * EltVT.getSizeInBits()); - assert(EltVT.isSimple() && "Promoting to a non-simple vector type!"); - - // Build a new vector type and check if it is legal. - MVT NVT = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts); + EVT NVT = VT.widenIntegerVectorElementType(*DAG.getContext()); + assert(NVT.isSimple() && "Promoting to a non-simple vector type!"); SDLoc dl(Op); SmallVector<SDValue, 4> Operands(Op.getNumOperands()); @@ -375,6 +381,35 @@ SDValue VectorLegalizer::PromoteVectorOpINT_TO_FP(SDValue Op) { Operands.size()); } +// For FP_TO_INT we promote the result type to a vector type with wider +// elements and then truncate the result. This is different from the default +// PromoteVector which uses bitcast to promote thus assumning that the +// promoted vector type has the same overall size. +SDValue VectorLegalizer::PromoteVectorOpFP_TO_INT(SDValue Op, bool isSigned) { + assert(Op.getNode()->getNumValues() == 1 && + "Can't promote a vector with multiple results!"); + EVT VT = Op.getValueType(); + + EVT NewVT; + unsigned NewOpc; + while (1) { + NewVT = VT.widenIntegerVectorElementType(*DAG.getContext()); + assert(NewVT.isSimple() && "Promoting to a non-simple vector type!"); + if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewVT)) { + NewOpc = ISD::FP_TO_SINT; + break; + } + if (!isSigned && TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewVT)) { + NewOpc = ISD::FP_TO_UINT; + break; + } + } + + SDLoc loc(Op); + SDValue promoted = DAG.getNode(NewOpc, SDLoc(Op), NewVT, Op.getOperand(0)); + return DAG.getNode(ISD::TRUNCATE, SDLoc(Op), VT, promoted); +} + SDValue VectorLegalizer::ExpandLoad(SDValue Op) { SDLoc dl(Op); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index f7a3e3d..940a9c9 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -65,6 +65,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break; case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break; case ISD::ANY_EXTEND: + case ISD::BSWAP: case ISD::CTLZ: case ISD::CTPOP: case ISD::CTTZ: @@ -384,6 +385,9 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { case ISD::STORE: Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo); break; + case ISD::FP_ROUND: + Res = ScalarizeVecOp_FP_ROUND(N, OpNo); + break; } } @@ -467,6 +471,15 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ N->getOriginalAlignment(), N->getTBAAInfo()); } +/// ScalarizeVecOp_FP_ROUND - If the value to round is a vector that needs +/// to be scalarized, it must be <1 x ty>. Convert the element instead. +SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo) { + SDValue Elt = GetScalarizedVector(N->getOperand(0)); + SDValue Res = DAG.getNode(ISD::FP_ROUND, SDLoc(N), + N->getValueType(0).getVectorElementType(), Elt, + N->getOperand(1)); + return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res); +} //===----------------------------------------------------------------------===// // Result Vector Splitting @@ -521,6 +534,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi); break; + case ISD::BSWAP: case ISD::CONVERT_RNDSAT: case ISD::CTLZ: case ISD::CTTZ: @@ -624,7 +638,7 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, // We know the result is a vector. The input may be either a vector or a // scalar value. EVT LoVT, HiVT; - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); SDLoc dl(N); SDValue InOp = N->getOperand(0); @@ -679,7 +693,7 @@ void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT LoVT, HiVT; SDLoc dl(N); - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); unsigned LoNumElts = LoVT.getVectorNumElements(); SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+LoNumElts); Lo = DAG.getNode(ISD::BUILD_VECTOR, dl, LoVT, &LoOps[0], LoOps.size()); @@ -700,7 +714,7 @@ void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, } EVT LoVT, HiVT; - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+NumSubvectors); Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, &LoOps[0], LoOps.size()); @@ -716,7 +730,7 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDLoc dl(N); EVT LoVT, HiVT; - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx); uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); @@ -740,7 +754,7 @@ void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDLoc dl(N); EVT LoVT, HiVT; - llvm::tie(LoVT, HiVT) = + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(cast<VTSDNode>(N->getOperand(1))->getVT()); Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, @@ -804,7 +818,7 @@ void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT LoVT, HiVT; SDLoc dl(N); - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0)); Hi = DAG.getUNDEF(HiVT); } @@ -814,7 +828,7 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!"); EVT LoVT, HiVT; SDLoc dl(LD); - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0)); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0)); ISD::LoadExtType ExtType = LD->getExtensionType(); SDValue Ch = LD->getChain(); @@ -828,7 +842,7 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, const MDNode *TBAAInfo = LD->getTBAAInfo(); EVT LoMemVT, HiMemVT; - llvm::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset, LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal, @@ -859,12 +873,12 @@ void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT LoVT, HiVT; SDLoc DL(N); - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); // Split the input. SDValue LL, LH, RL, RH; - llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); - llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); + std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); + std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); @@ -875,7 +889,7 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, // Get the dest types - they may not match the input types, e.g. int_to_fp. EVT LoVT, HiVT; SDLoc dl(N); - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); // If the input also splits, handle it directly for a compile time speedup. // Otherwise split it by hand. @@ -883,7 +897,7 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) GetSplitVector(N->getOperand(0), Lo, Hi); else - llvm::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0); + std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0); if (N->getOpcode() == ISD::FP_ROUND) { Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1)); @@ -912,7 +926,7 @@ void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, EVT SrcVT = N->getOperand(0).getValueType(); EVT DestVT = N->getValueType(0); EVT LoVT, HiVT; - llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(DestVT); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(DestVT); // We can do better than a generic split operation if the extend is doing // more than just doubling the width of the elements and the following are @@ -938,7 +952,7 @@ void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, EVT SplitSrcVT = EVT::getVectorVT(Ctx, SrcVT.getVectorElementType(), NumElements / 2); EVT SplitLoVT, SplitHiVT; - llvm::tie(SplitLoVT, SplitHiVT) = DAG.GetSplitDestVTs(NewSrcVT); + std::tie(SplitLoVT, SplitHiVT) = DAG.GetSplitDestVTs(NewSrcVT); if (TLI.isTypeLegal(SrcVT) && !TLI.isTypeLegal(SplitSrcVT) && TLI.isTypeLegal(NewSrcVT) && TLI.isTypeLegal(SplitLoVT)) { DEBUG(dbgs() << "Split vector extend via incremental extend:"; @@ -947,7 +961,7 @@ void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue NewSrc = DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0)); // Get the low and high halves of the new, extended one step, vector. - llvm::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl); + std::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl); // Extend those vector halves the rest of the way. Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo); Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi); @@ -1160,13 +1174,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_VSELECT(SDNode *N, unsigned OpNo) { "Lo and Hi have differing types"); EVT LoOpVT, HiOpVT; - llvm::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(Src0VT); + std::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(Src0VT); assert(LoOpVT == HiOpVT && "Asymmetric vector split?"); SDValue LoOp0, HiOp0, LoOp1, HiOp1, LoMask, HiMask; - llvm::tie(LoOp0, HiOp0) = DAG.SplitVector(Src0, DL); - llvm::tie(LoOp1, HiOp1) = DAG.SplitVector(Src1, DL); - llvm::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL); + std::tie(LoOp0, HiOp0) = DAG.SplitVector(Src0, DL); + std::tie(LoOp1, HiOp1) = DAG.SplitVector(Src1, DL); + std::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL); SDValue LoSelect = DAG.getNode(ISD::VSELECT, DL, LoOpVT, LoMask, LoOp0, LoOp1); @@ -1281,7 +1295,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { GetSplitVector(N->getOperand(1), Lo, Hi); EVT LoMemVT, HiMemVT; - llvm::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); unsigned IncrementSize = LoMemVT.getSizeInBits()/8; @@ -1370,7 +1384,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) { // Extract the halves of the input via extract_subvector. SDValue InLoVec, InHiVec; - llvm::tie(InLoVec, InHiVec) = DAG.SplitVector(InVec, DL); + std::tie(InLoVec, InHiVec) = DAG.SplitVector(InVec, DL); // Truncate them to 1/2 the element size. EVT HalfElementVT = EVT::getIntegerVT(*DAG.getContext(), InElementSize/2); EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, @@ -2180,6 +2194,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) { if (getTypeAction(CondVT) == TargetLowering::TypeWidenVector) Cond1 = GetWidenedVector(Cond1); + // If we have to split the condition there is no point in widening the + // select. This would result in an cycle of widening the select -> + // widening the condition operand -> splitting the condition operand -> + // splitting the select -> widening the select. Instead split this select + // further and widen the resulting type. + if (getTypeAction(CondVT) == TargetLowering::TypeSplitVector) { + SDValue SplitSelect = SplitVecOp_VSELECT(N, 0); + SDValue Res = ModifyToType(SplitSelect, WidenVT); + return Res; + } + if (Cond1.getValueType() != CondWidenVT) Cond1 = ModifyToType(Cond1, CondWidenVT); } @@ -2251,7 +2276,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) { SDValue InOp1 = N->getOperand(0); EVT InVT = InOp1.getValueType(); - assert(InVT.isVector() && "can not widen non vector type"); + assert(InVT.isVector() && "can not widen non-vector type"); EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), WidenNumElts); InOp1 = GetWidenedVector(InOp1); diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index 1dd2128..3b3424d 100644 --- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -603,7 +603,7 @@ SUnit *ResourcePriorityQueue::pop() { std::vector<SUnit *>::iterator Best = Queue.begin(); if (!DisableDFASched) { signed BestCost = SUSchedulingCost(*Best); - for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()), + for (std::vector<SUnit *>::iterator I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I) { if (SUSchedulingCost(*I) > BestCost) { @@ -614,14 +614,14 @@ SUnit *ResourcePriorityQueue::pop() { } // Use default TD scheduling mechanism. else { - for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()), + for (std::vector<SUnit *>::iterator I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I) if (Picker(*Best, *I)) Best = I; } SUnit *V = *Best; - if (Best != prior(Queue.end())) + if (Best != std::prev(Queue.end())) std::swap(*Best, Queue.back()); Queue.pop_back(); @@ -633,7 +633,7 @@ SUnit *ResourcePriorityQueue::pop() { void ResourcePriorityQueue::remove(SUnit *SU) { assert(!Queue.empty() && "Queue is empty!"); std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU); - if (I != prior(Queue.end())) + if (I != std::prev(Queue.end())) std::swap(*I, Queue.back()); Queue.pop_back(); diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h index 4af7172..b62bd62 100644 --- a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h +++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h @@ -15,8 +15,8 @@ #define LLVM_CODEGEN_SDNODEDBGVALUE_H #include "llvm/ADT/SmallVector.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/Support/DataTypes.h" -#include "llvm/Support/DebugLoc.h" namespace llvm { diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 6c5e0ab..0687392 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -80,7 +80,7 @@ public: ScheduleDAGFast(MachineFunction &mf) : ScheduleDAGSDNodes(mf) {} - void Schedule(); + void Schedule() override; /// AddPred - adds a predecessor edge to SUnit SU. /// This returns true if this is a new predecessor. @@ -107,7 +107,7 @@ private: void ListScheduleBottomUp(); /// forceUnitLatencies - The fast scheduler doesn't care about real latencies. - bool forceUnitLatencies() const { return true; } + bool forceUnitLatencies() const override { return true; } }; } // end anonymous namespace @@ -646,9 +646,10 @@ class ScheduleDAGLinearize : public ScheduleDAGSDNodes { public: ScheduleDAGLinearize(MachineFunction &mf) : ScheduleDAGSDNodes(mf) {} - void Schedule(); + void Schedule() override; - MachineBasicBlock *EmitSchedule(MachineBasicBlock::iterator &InsertPos); + MachineBasicBlock * + EmitSchedule(MachineBasicBlock::iterator &InsertPos) override; private: std::vector<SDNode*> Sequence; diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 1a562d7..c283664 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -177,7 +177,7 @@ public: delete AvailableQueue; } - void Schedule(); + void Schedule() override; ScheduleHazardRecognizer *getHazardRec() { return HazardRec; } @@ -261,7 +261,7 @@ private: /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't /// need actual latency information but the hybrid scheduler does. - bool forceUnitLatencies() const { + bool forceUnitLatencies() const override { return !NeedLatency; } }; @@ -1539,7 +1539,6 @@ template<class SF> struct reverse_sort : public queue_sort { SF &SortFunc; reverse_sort(SF &sf) : SortFunc(sf) {} - reverse_sort(const reverse_sort &RHS) : SortFunc(RHS.SortFunc) {} bool operator()(SUnit* left, SUnit* right) const { // reverse left/right rather than simply !SortFunc(left, right) @@ -1559,7 +1558,6 @@ struct bu_ls_rr_sort : public queue_sort { RegReductionPQBase *SPQ; bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} - bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} bool operator()(SUnit* left, SUnit* right) const; }; @@ -1574,8 +1572,6 @@ struct src_ls_rr_sort : public queue_sort { RegReductionPQBase *SPQ; src_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} - src_ls_rr_sort(const src_ls_rr_sort &RHS) - : SPQ(RHS.SPQ) {} bool operator()(SUnit* left, SUnit* right) const; }; @@ -1590,8 +1586,6 @@ struct hybrid_ls_rr_sort : public queue_sort { RegReductionPQBase *SPQ; hybrid_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} - hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS) - : SPQ(RHS.SPQ) {} bool isReady(SUnit *SU, unsigned CurCycle) const; @@ -1609,8 +1603,6 @@ struct ilp_ls_rr_sort : public queue_sort { RegReductionPQBase *SPQ; ilp_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} - ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS) - : SPQ(RHS.SPQ) {} bool isReady(SUnit *SU, unsigned CurCycle) const; @@ -1675,13 +1667,13 @@ public: return scheduleDAG->getHazardRec(); } - void initNodes(std::vector<SUnit> &sunits); + void initNodes(std::vector<SUnit> &sunits) override; - void addNode(const SUnit *SU); + void addNode(const SUnit *SU) override; - void updateNode(const SUnit *SU); + void updateNode(const SUnit *SU) override; - void releaseState() { + void releaseState() override { SUnits = 0; SethiUllmanNumbers.clear(); std::fill(RegPressure.begin(), RegPressure.end(), 0); @@ -1695,26 +1687,26 @@ public: return SU->getNode()->getIROrder(); } - bool empty() const { return Queue.empty(); } + bool empty() const override { return Queue.empty(); } - void push(SUnit *U) { + void push(SUnit *U) override { assert(!U->NodeQueueId && "Node in the queue already"); U->NodeQueueId = ++CurQueueId; Queue.push_back(U); } - void remove(SUnit *SU) { + void remove(SUnit *SU) override { assert(!Queue.empty() && "Queue is empty!"); assert(SU->NodeQueueId != 0 && "Not in queue!"); std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU); - if (I != prior(Queue.end())) + if (I != std::prev(Queue.end())) std::swap(*I, Queue.back()); Queue.pop_back(); SU->NodeQueueId = 0; } - bool tracksRegPressure() const { return TracksRegPressure; } + bool tracksRegPressure() const override { return TracksRegPressure; } void dumpRegPressure() const; @@ -1724,9 +1716,9 @@ public: int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const; - void scheduledNode(SUnit *SU); + void scheduledNode(SUnit *SU) override; - void unscheduledNode(SUnit *SU); + void unscheduledNode(SUnit *SU) override; protected: bool canClobber(const SUnit *SU, const SUnit *Op); @@ -1738,12 +1730,12 @@ protected: template<class SF> static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) { std::vector<SUnit *>::iterator Best = Q.begin(); - for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()), + for (std::vector<SUnit *>::iterator I = std::next(Q.begin()), E = Q.end(); I != E; ++I) if (Picker(*Best, *I)) Best = I; SUnit *V = *Best; - if (Best != prior(Q.end())) + if (Best != std::prev(Q.end())) std::swap(*Best, Q.back()); Q.pop_back(); return V; @@ -1776,13 +1768,13 @@ public: tii, tri, tli), Picker(this) {} - bool isBottomUp() const { return SF::IsBottomUp; } + bool isBottomUp() const override { return SF::IsBottomUp; } - bool isReady(SUnit *U) const { + bool isReady(SUnit *U) const override { return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle()); } - SUnit *pop() { + SUnit *pop() override { if (Queue.empty()) return NULL; SUnit *V = popFromQueue(Queue, Picker, scheduleDAG); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 054e3dd..5639894 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -738,13 +738,13 @@ ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI() || // Fast-isel may have inserted some instructions, in which case the // BB->back().isPHI() test will not fire when we want it to. - prior(Emitter.getInsertPos())->isPHI()) { + std::prev(Emitter.getInsertPos())->isPHI()) { // Did not insert any instruction. Orders.push_back(std::make_pair(Order, (MachineInstr*)0)); return; } - Orders.push_back(std::make_pair(Order, prior(Emitter.getInsertPos()))); + Orders.push_back(std::make_pair(Order, std::prev(Emitter.getInsertPos()))); ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order); } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 2ff37e0..5e11dbb 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -117,13 +117,13 @@ namespace llvm { virtual MachineBasicBlock* EmitSchedule(MachineBasicBlock::iterator &InsertPos); - virtual void dumpNode(const SUnit *SU) const; + void dumpNode(const SUnit *SU) const override; void dumpSchedule() const; - virtual std::string getGraphNodeLabel(const SUnit *SU) const; + std::string getGraphNodeLabel(const SUnit *SU) const override; - virtual std::string getDAGName() const; + std::string getDAGName() const override; virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const; diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index 58aa1fe..fb86103 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -80,7 +80,7 @@ public: delete AvailableQueue; } - void Schedule(); + void Schedule() override; private: void releaseSucc(SUnit *SU, const SDep &D); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 45d5a4f..d11ce80 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -18,17 +18,15 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/DebugInfo.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalAlias.h" @@ -179,6 +177,22 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) { return true; } +/// \brief Return true if the specified node is a BUILD_VECTOR node of +/// all ConstantSDNode or undef. +bool ISD::isBuildVectorOfConstantSDNodes(const SDNode *N) { + if (N->getOpcode() != ISD::BUILD_VECTOR) + return false; + + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDValue Op = N->getOperand(i); + if (Op.getOpcode() == ISD::UNDEF) + continue; + if (!isa<ConstantSDNode>(Op)) + return false; + } + return true; +} + /// isScalarToVector - Return true if the specified node is a /// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low /// element is not an undef. @@ -217,6 +231,21 @@ bool ISD::allOperandsUndef(const SDNode *N) { return true; } +ISD::NodeType ISD::getExtForLoadExtType(ISD::LoadExtType ExtType) { + switch (ExtType) { + case ISD::EXTLOAD: + return ISD::ANY_EXTEND; + case ISD::SEXTLOAD: + return ISD::SIGN_EXTEND; + case ISD::ZEXTLOAD: + return ISD::ZERO_EXTEND; + default: + break; + } + + llvm_unreachable("Invalid LoadExtType"); +} + /// getSetCCSwappedOperands - Return the operation corresponding to (Y op X) /// when given the operation for (X op Y). ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) { @@ -369,9 +398,12 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { llvm_unreachable("Should only be used on nodes with operands"); default: break; // Normal nodes don't need extra info. case ISD::TargetConstant: - case ISD::Constant: - ID.AddPointer(cast<ConstantSDNode>(N)->getConstantIntValue()); + case ISD::Constant: { + const ConstantSDNode *C = cast<ConstantSDNode>(N); + ID.AddPointer(C->getConstantIntValue()); + ID.AddBoolean(C->isOpaque()); break; + } case ISD::TargetConstantFP: case ISD::ConstantFP: { ID.AddPointer(cast<ConstantFPSDNode>(N)->getConstantFPValue()); @@ -869,7 +901,7 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const { // EntryNode could meaningfully have debug info if we can find it... SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) - : TM(tm), TSI(*tm.getSelectionDAGInfo()), TTI(0), TLI(0), OptLevel(OL), + : TM(tm), TSI(*tm.getSelectionDAGInfo()), TLI(0), OptLevel(OL), EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)), Root(getEntryNode()), NewNodesMustHaveLegalTypes(false), UpdateListeners(0) { @@ -877,10 +909,8 @@ SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) DbgInfo = new SDDbgInfo(); } -void SelectionDAG::init(MachineFunction &mf, const TargetTransformInfo *tti, - const TargetLowering *tli) { +void SelectionDAG::init(MachineFunction &mf, const TargetLowering *tli) { MF = &mf; - TTI = tti; TLI = tli; Context = &mf.getFunction()->getContext(); } @@ -956,19 +986,21 @@ SDValue SelectionDAG::getNOT(SDLoc DL, SDValue Val, EVT VT) { return getNode(ISD::XOR, DL, VT, Val, NegOne); } -SDValue SelectionDAG::getConstant(uint64_t Val, EVT VT, bool isT) { +SDValue SelectionDAG::getConstant(uint64_t Val, EVT VT, bool isT, bool isO) { EVT EltVT = VT.getScalarType(); assert((EltVT.getSizeInBits() >= 64 || (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) && "getConstant with a uint64_t value that doesn't fit in the type!"); - return getConstant(APInt(EltVT.getSizeInBits(), Val), VT, isT); + return getConstant(APInt(EltVT.getSizeInBits(), Val), VT, isT, isO); } -SDValue SelectionDAG::getConstant(const APInt &Val, EVT VT, bool isT) { - return getConstant(*ConstantInt::get(*Context, Val), VT, isT); +SDValue SelectionDAG::getConstant(const APInt &Val, EVT VT, bool isT, bool isO) +{ + return getConstant(*ConstantInt::get(*Context, Val), VT, isT, isO); } -SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) { +SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT, + bool isO) { assert(VT.isInteger() && "Cannot create FP integer constant!"); EVT EltVT = VT.getScalarType(); @@ -1010,7 +1042,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) { for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i) { EltParts.push_back(getConstant(NewVal.lshr(i * ViaEltSizeInBits) .trunc(ViaEltSizeInBits), - ViaEltVT, isT)); + ViaEltVT, isT, isO)); } // EltParts is currently in little endian order. If we actually want @@ -1041,6 +1073,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0); ID.AddPointer(Elt); + ID.AddBoolean(isO); void *IP = 0; SDNode *N = NULL; if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) @@ -1048,7 +1081,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) { return SDValue(N, 0); if (!N) { - N = new (NodeAllocator) ConstantSDNode(isT, Elt, EltVT); + N = new (NodeAllocator) ConstantSDNode(isT, isO, Elt, EltVT); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); } @@ -1139,7 +1172,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL, if (!GVar) { // If GV is an alias then use the aliasee for determining thread-localness. if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) - GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false)); + GVar = dyn_cast_or_null<GlobalVariable>(GA->getAliasedGlobal()); } unsigned Opc; @@ -2502,17 +2535,23 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT) { SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue Operand) { - // Constant fold unary operations with an integer constant operand. + // Constant fold unary operations with an integer constant operand. Even + // opaque constant will be folded, because the folding of unary operations + // doesn't create new constants with different values. Nevertheless, the + // opaque flag is preserved during folding to prevent future folding with + // other constants. if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand.getNode())) { const APInt &Val = C->getAPIntValue(); switch (Opcode) { default: break; case ISD::SIGN_EXTEND: - return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), VT); + return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), VT, + C->isTargetOpcode(), C->isOpaque()); case ISD::ANY_EXTEND: case ISD::ZERO_EXTEND: case ISD::TRUNCATE: - return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT); + return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT, + C->isTargetOpcode(), C->isOpaque()); case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: { APFloat apf(EVTToAPFloatSemantics(VT), @@ -2529,15 +2568,19 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, return getConstantFP(APFloat(APFloat::IEEEdouble, Val), VT); break; case ISD::BSWAP: - return getConstant(Val.byteSwap(), VT); + return getConstant(Val.byteSwap(), VT, C->isTargetOpcode(), + C->isOpaque()); case ISD::CTPOP: - return getConstant(Val.countPopulation(), VT); + return getConstant(Val.countPopulation(), VT, C->isTargetOpcode(), + C->isOpaque()); case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: - return getConstant(Val.countLeadingZeros(), VT); + return getConstant(Val.countLeadingZeros(), VT, C->isTargetOpcode(), + C->isOpaque()); case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: - return getConstant(Val.countTrailingZeros(), VT); + return getConstant(Val.countTrailingZeros(), VT, C->isTargetOpcode(), + C->isOpaque()); } } @@ -2774,10 +2817,13 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT, ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1); ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2); - if (Scalar1 && Scalar2) { + if (Scalar1 && Scalar2 && (Scalar1->isOpaque() || Scalar2->isOpaque())) + return SDValue(); + + if (Scalar1 && Scalar2) // Scalar instruction. Inputs.push_back(std::make_pair(Scalar1, Scalar2)); - } else { + else { // For vectors extract each constant element into Inputs so we can constant // fold them individually. BuildVectorSDNode *BV1 = dyn_cast<BuildVectorSDNode>(Cst1); @@ -2793,6 +2839,9 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT, if (!V1 || !V2) // Not a constant, bail. return SDValue(); + if (V1->isOpaque() || V2->isOpaque()) + return SDValue(); + // Avoid BUILD_VECTOR nodes that perform implicit truncation. // FIXME: This is valid and could be handled by truncating the APInts. if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT) @@ -3546,10 +3595,10 @@ static SDValue getMemsetStringVal(EVT VT, SDLoc dl, SelectionDAG &DAG, Val |= (uint64_t)(unsigned char)Str[i] << (NumVTBytes-i-1)*8; } - // If the "cost" of materializing the integer immediate is 1 or free, then - // it is cost effective to turn the load into the immediate. - const TargetTransformInfo *TTI = DAG.getTargetTransformInfo(); - if (TTI->getIntImmCost(Val, VT.getTypeForEVT(*DAG.getContext())) < 2) + // If the "cost" of materializing the integer immediate is less than the cost + // of a load, then it is cost effective to turn the load into the immediate. + Type *Ty = VT.getTypeForEVT(*DAG.getContext()); + if (TLI.shouldConvertConstantLoadToIntImm(Val, Ty)) return DAG.getConstant(Val, VT); return SDValue(0, 0); } @@ -3609,8 +3658,9 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, DAG.getMachineFunction()); if (VT == MVT::Other) { - if (DstAlign >= TLI.getDataLayout()->getPointerPrefAlignment() || - TLI.allowsUnalignedMemoryAccesses(VT)) { + unsigned AS = 0; + if (DstAlign >= TLI.getDataLayout()->getPointerPrefAlignment(AS) || + TLI.allowsUnalignedMemoryAccesses(VT, AS)) { VT = TLI.getPointerTy(); } else { switch (DstAlign & 7) { @@ -3667,9 +3717,10 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, // FIXME: Only does this for 64-bit or more since we don't have proper // cost model for unaligned load / store. bool Fast; + unsigned AS = 0; if (NumMemOps && AllowOverlap && VTSize >= 8 && NewVTSize < Size && - TLI.allowsUnalignedMemoryAccesses(VT, &Fast) && Fast) + TLI.allowsUnalignedMemoryAccesses(VT, AS, &Fast) && Fast) VTSize = Size; else { VT = NewVT; @@ -4182,9 +4233,10 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst, } SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, - SDVTList VTList, SDValue* Ops, unsigned NumOps, + SDVTList VTList, SDValue *Ops, unsigned NumOps, MachineMemOperand *MMO, - AtomicOrdering Ordering, + AtomicOrdering SuccessOrdering, + AtomicOrdering FailureOrdering, SynchronizationScope SynchScope) { FoldingSetNodeID ID; ID.AddInteger(MemVT.getRawBits()); @@ -4206,17 +4258,28 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl.getIROrder(), dl.getDebugLoc(), VTList, MemVT, Ops, DynOps, NumOps, MMO, - Ordering, SynchScope); + SuccessOrdering, FailureOrdering, + SynchScope); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); } SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, + SDVTList VTList, SDValue *Ops, unsigned NumOps, + MachineMemOperand *MMO, + AtomicOrdering Ordering, + SynchronizationScope SynchScope) { + return getAtomic(Opcode, dl, MemVT, VTList, Ops, NumOps, MMO, Ordering, + Ordering, SynchScope); +} + +SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Cmp, SDValue Swp, MachinePointerInfo PtrInfo, unsigned Alignment, - AtomicOrdering Ordering, + AtomicOrdering SuccessOrdering, + AtomicOrdering FailureOrdering, SynchronizationScope SynchScope) { if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(MemVT); @@ -4237,14 +4300,15 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment); return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Cmp, Swp, MMO, - Ordering, SynchScope); + SuccessOrdering, FailureOrdering, SynchScope); } SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Cmp, SDValue Swp, MachineMemOperand *MMO, - AtomicOrdering Ordering, + AtomicOrdering SuccessOrdering, + AtomicOrdering FailureOrdering, SynchronizationScope SynchScope) { assert(Opcode == ISD::ATOMIC_CMP_SWAP && "Invalid Atomic Op"); assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types"); @@ -4253,7 +4317,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDVTList VTs = getVTList(VT, MVT::Other); SDValue Ops[] = {Chain, Ptr, Cmp, Swp}; - return getAtomic(Opcode, dl, MemVT, VTs, Ops, 4, MMO, Ordering, SynchScope); + return getAtomic(Opcode, dl, MemVT, VTs, Ops, 4, MMO, SuccessOrdering, + FailureOrdering, SynchScope); } SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, @@ -5633,7 +5698,7 @@ class RAUWUpdateListener : public SelectionDAG::DAGUpdateListener { SDNode::use_iterator &UI; SDNode::use_iterator &UE; - virtual void NodeDeleted(SDNode *N, SDNode *E) { + void NodeDeleted(SDNode *N, SDNode *E) override { // Increment the iterator as needed. while (UI != UE && N == *UI) ++UI; @@ -6457,7 +6522,7 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits, - bool isBigEndian) { + bool isBigEndian) const { EVT VT = getValueType(0); assert(VT.isVector() && "Expected a vector type"); unsigned sz = VT.getSizeInBits(); @@ -6518,6 +6583,27 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, return true; } +ConstantSDNode *BuildVectorSDNode::getConstantSplatValue() const { + SDValue Op0 = getOperand(0); + if (Op0.getOpcode() != ISD::Constant) + return nullptr; + + for (unsigned i = 1, e = getNumOperands(); i != e; ++i) + if (getOperand(i) != Op0) + return nullptr; + + return cast<ConstantSDNode>(Op0); +} + +bool BuildVectorSDNode::isConstant() const { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + unsigned Opc = getOperand(i).getOpcode(); + if (Opc != ISD::UNDEF && Opc != ISD::Constant && Opc != ISD::ConstantFP) + return false; + } + return true; +} + bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) { // Find the first non-undef value in the shuffle mask. unsigned i, e; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 2b2713d..4a6e5cf 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -34,10 +34,10 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/StackMaps.h" -#include "llvm/DebugInfo.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" @@ -214,6 +214,20 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, llvm_unreachable("Unknown mismatch!"); } +static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V, + const Twine &ErrMsg) { + const Instruction *I = dyn_cast_or_null<Instruction>(V); + if (!V) + return Ctx.emitError(ErrMsg); + + const char *AsmError = ", possible invalid constraint for vector type"; + if (const CallInst *CI = dyn_cast<CallInst>(I)) + if (isa<InlineAsm>(CI->getCalledValue())) + return Ctx.emitError(I, ErrMsg + AsmError); + + return Ctx.emitError(I, ErrMsg); +} + /// getCopyFromPartsVector - Create a value that contains the specified legal /// parts combined into the value they represent. If the parts combine to a /// type larger then ValueVT then AssertOp can be used to specify whether the @@ -306,16 +320,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, // Handle cases such as i8 -> <1 x i1> if (ValueVT.getVectorNumElements() != 1) { - LLVMContext &Ctx = *DAG.getContext(); - Twine ErrMsg("non-trivial scalar-to-vector conversion"); - if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) { - if (const CallInst *CI = dyn_cast<CallInst>(I)) - if (isa<InlineAsm>(CI->getCalledValue())) - ErrMsg = ErrMsg + ", possible invalid constraint for vector type"; - Ctx.emitError(I, ErrMsg); - } else { - Ctx.emitError(ErrMsg); - } + diagnosePossiblyInvalidConstraint(*DAG.getContext(), V, + "non-trivial scalar-to-vector conversion"); return DAG.getUNDEF(ValueVT); } @@ -397,18 +403,9 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL, "Failed to tile the value with PartVT!"); if (NumParts == 1) { - if (PartEVT != ValueVT) { - LLVMContext &Ctx = *DAG.getContext(); - Twine ErrMsg("scalar-to-vector conversion failed"); - if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) { - if (const CallInst *CI = dyn_cast<CallInst>(I)) - if (isa<InlineAsm>(CI->getCalledValue())) - ErrMsg = ErrMsg + ", possible invalid constraint for vector type"; - Ctx.emitError(I, ErrMsg); - } else { - Ctx.emitError(ErrMsg); - } - } + if (PartEVT != ValueVT) + diagnosePossiblyInvalidConstraint(*DAG.getContext(), V, + "scalar-to-vector conversion failed"); Parts[0] = Val; return; @@ -627,16 +624,6 @@ namespace { } } - /// areValueTypesLegal - Return true if types of all the values are legal. - bool areValueTypesLegal(const TargetLowering &TLI) { - for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { - MVT RegisterVT = RegVTs[Value]; - if (!TLI.isTypeLegal(RegisterVT)) - return false; - } - return true; - } - /// append - Add the specified values to this one. void append(const RegsForValue &RHS) { ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end()); @@ -851,12 +838,20 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, SDValue Res = DAG.getTargetConstant(Flag, MVT::i32); Ops.push_back(Res); + unsigned SP = TLI.getStackPointerRegisterToSaveRestore(); for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]); MVT RegisterVT = RegVTs[Value]; for (unsigned i = 0; i != NumRegs; ++i) { assert(Reg < Regs.size() && "Mismatch in # registers expected"); - Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT)); + unsigned TheReg = Regs[Reg++]; + Ops.push_back(DAG.getRegister(TheReg, RegisterVT)); + + if (TheReg == SP && Code == InlineAsm::Kind_Clobber) { + // If we clobbered the stack pointer, MFI should know about it. + assert(DAG.getMachineFunction().getFrameInfo()-> + hasInlineAsmWithSPAdjust()); + } } } } @@ -866,7 +861,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa, AA = &aa; GFI = gfi; LibInfo = li; - TD = DAG.getTarget().getDataLayout(); + DL = DAG.getTarget().getDataLayout(); Context = DAG.getContext(); LPadToCallSiteMap.clear(); } @@ -884,6 +879,7 @@ void SelectionDAGBuilder::clear() { PendingExports.clear(); CurInst = NULL; HasTailCall = false; + SDNodeOrder = LowestSDNodeOrder; } /// clearDanglingDebugInfo - Clear the dangling debug information @@ -1384,7 +1380,9 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, - MachineBasicBlock *SwitchBB) { + MachineBasicBlock *SwitchBB, + uint32_t TWeight, + uint32_t FWeight) { const BasicBlock *BB = CurBB->getBasicBlock(); // If the leaf of the tree is a comparison, merge the condition into @@ -1409,7 +1407,7 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, } CaseBlock CB(Condition, BOp->getOperand(0), - BOp->getOperand(1), NULL, TBB, FBB, CurBB); + BOp->getOperand(1), NULL, TBB, FBB, CurBB, TWeight, FWeight); SwitchCases.push_back(CB); return; } @@ -1417,17 +1415,26 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, // Create a CaseBlock record representing this branch. CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()), - NULL, TBB, FBB, CurBB); + NULL, TBB, FBB, CurBB, TWeight, FWeight); SwitchCases.push_back(CB); } +/// Scale down both weights to fit into uint32_t. +static void ScaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) { + uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse; + uint32_t Scale = (NewMax / UINT32_MAX) + 1; + NewTrue = NewTrue / Scale; + NewFalse = NewFalse / Scale; +} + /// FindMergedConditions - If Cond is an expression like void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, - unsigned Opc) { + unsigned Opc, uint32_t TWeight, + uint32_t FWeight) { // If this node is not part of the or/and tree, emit it as a branch. const Instruction *BOp = dyn_cast<Instruction>(Cond); if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) || @@ -1435,7 +1442,8 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, BOp->getParent() != CurBB->getBasicBlock() || !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) || !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) { - EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB); + EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB, + TWeight, FWeight); return; } @@ -1447,6 +1455,7 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, if (Opc == Instruction::Or) { // Codegen X | Y as: + // BB1: // jmp_if_X TBB // jmp TmpBB // TmpBB: @@ -1454,14 +1463,34 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, // jmp FBB // + // We have flexibility in setting Prob for BB1 and Prob for TmpBB. + // The requirement is that + // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB) + // = TrueProb for orignal BB. + // Assuming the orignal weights are A and B, one choice is to set BB1's + // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice + // assumes that + // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB. + // Another choice is to assume TrueProb for BB1 equals to TrueProb for + // TmpBB, but the math is more complicated. + + uint64_t NewTrueWeight = TWeight; + uint64_t NewFalseWeight = (uint64_t)TWeight + 2 * (uint64_t)FWeight; + ScaleWeights(NewTrueWeight, NewFalseWeight); // Emit the LHS condition. - FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc); + FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc, + NewTrueWeight, NewFalseWeight); + NewTrueWeight = TWeight; + NewFalseWeight = 2 * (uint64_t)FWeight; + ScaleWeights(NewTrueWeight, NewFalseWeight); // Emit the RHS condition into TmpBB. - FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc); + FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, + NewTrueWeight, NewFalseWeight); } else { assert(Opc == Instruction::And && "Unknown merge op!"); // Codegen X & Y as: + // BB1: // jmp_if_X TmpBB // jmp FBB // TmpBB: @@ -1470,11 +1499,28 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, // // This requires creation of TmpBB after CurBB. + // We have flexibility in setting Prob for BB1 and Prob for TmpBB. + // The requirement is that + // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB) + // = FalseProb for orignal BB. + // Assuming the orignal weights are A and B, one choice is to set BB1's + // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice + // assumes that + // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB. + + uint64_t NewTrueWeight = 2 * (uint64_t)TWeight + (uint64_t)FWeight; + uint64_t NewFalseWeight = FWeight; + ScaleWeights(NewTrueWeight, NewFalseWeight); // Emit the LHS condition. - FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc); + FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc, + NewTrueWeight, NewFalseWeight); + NewTrueWeight = 2 * (uint64_t)TWeight; + NewFalseWeight = FWeight; + ScaleWeights(NewTrueWeight, NewFalseWeight); // Emit the RHS condition into TmpBB. - FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc); + FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, + NewTrueWeight, NewFalseWeight); } } @@ -1525,8 +1571,9 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // Update machine-CFG edges. BrMBB->addSuccessor(Succ0MBB); - // If this is not a fall-through branch, emit the branch. - if (Succ0MBB != NextBlock) + // If this is not a fall-through branch or optimizations are switched off, + // emit the branch. + if (Succ0MBB != NextBlock || TM.getOptLevel() == CodeGenOpt::None) DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(), DAG.getBasicBlock(Succ0MBB))); @@ -1561,7 +1608,8 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { (BOp->getOpcode() == Instruction::And || BOp->getOpcode() == Instruction::Or)) { FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, - BOp->getOpcode()); + BOp->getOpcode(), getEdgeWeight(BrMBB, Succ0MBB), + getEdgeWeight(BrMBB, Succ1MBB)); // If the compares in later blocks need to use values not currently // exported from this block, export them now. This block should always // be the first entry. @@ -2351,7 +2399,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, volatile double RDensity = (double)RSize.roundToDouble() / (Last - RBegin + 1ULL).roundToDouble(); - double Metric = Range.logBase2()*(LDensity+RDensity); + volatile double Metric = Range.logBase2()*(LDensity+RDensity); // Should always split in some non-trivial place DEBUG(dbgs() <<"=>Step\n" << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n' @@ -2590,7 +2638,7 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, if (Cases.size() >= 2) // Must recompute end() each iteration because it may be // invalidated by erase if we hold on to it - for (CaseItr I = Cases.begin(), J = llvm::next(Cases.begin()); + for (CaseItr I = Cases.begin(), J = std::next(Cases.begin()); J != Cases.end(); ) { const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue(); const APInt& currentValue = cast<ConstantInt>(I->High)->getValue(); @@ -2936,6 +2984,13 @@ void SelectionDAGBuilder::visitBitCast(const User &I) { if (DestVT != N.getValueType()) setValue(&I, DAG.getNode(ISD::BITCAST, getCurSDLoc(), DestVT, N)); // convert types. + // Check if the original LLVM IR Operand was a ConstantInt, because getValue() + // might fold any kind of constant expression to an integer constant and that + // is not what we are looking for. Only regcognize a bitcast of a genuine + // constant integer as an opaque constant. + else if(ConstantInt *C = dyn_cast<ConstantInt>(I.getOperand(0))) + setValue(&I, DAG.getConstant(C->getValue(), DestVT, /*isTarget=*/false, + /*isOpaque*/true)); else setValue(&I, N); // noop cast. } @@ -3261,7 +3316,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue(); if (Field) { // N = N + Offset - uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field); + uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field); N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, DAG.getConstant(Offset, N.getValueType())); } @@ -3275,7 +3330,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { if (CI->isZero()) continue; uint64_t Offs = - TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); + DL->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); SDValue OffsVal; EVT PTy = TLI->getPointerTy(AS); unsigned PtrBits = PTy.getSizeInBits(); @@ -3292,7 +3347,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { // N = N + Idx * ElementSize; APInt ElementSize = APInt(TLI->getPointerSizeInBits(AS), - TD->getTypeAllocSize(Ty)); + DL->getTypeAllocSize(Ty)); SDValue IdxN = getValue(Idx); // If the index is smaller or larger than intptr_t, truncate or extend @@ -3370,9 +3425,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { setValue(&I, DSA); DAG.setRoot(DSA.getValue(1)); - // Inform the Frame Information that we have just allocated a variable-sized - // object. - FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1); + assert(FuncInfo.MF->getFrameInfo()->hasVarSizedObjects()); } void SelectionDAGBuilder::visitLoad(const LoadInst &I) { @@ -3400,7 +3453,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { SDValue Root; bool ConstantMemory = false; - if (I.isVolatile() || NumValues > MaxParallelChains) + if (isVolatile || NumValues > MaxParallelChains) // Serialize volatile loads with other side effects. Root = getRoot(); else if (AA->pointsToConstantMemory( @@ -3413,6 +3466,10 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { Root = DAG.getRoot(); } + const TargetLowering *TLI = TM.getTargetLowering(); + if (isVolatile) + Root = TLI->prepareVolatileOrAtomicLoad(Root, getCurSDLoc(), DAG); + SmallVector<SDValue, 4> Values(NumValues); SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains), NumValues)); @@ -3536,14 +3593,15 @@ static SDValue InsertFenceForAtomic(SDValue Chain, AtomicOrdering Order, void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { SDLoc dl = getCurSDLoc(); - AtomicOrdering Order = I.getOrdering(); + AtomicOrdering SuccessOrder = I.getSuccessOrdering(); + AtomicOrdering FailureOrder = I.getFailureOrdering(); SynchronizationScope Scope = I.getSynchScope(); SDValue InChain = getRoot(); const TargetLowering *TLI = TM.getTargetLowering(); if (TLI->getInsertFencesForAtomic()) - InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl, + InChain = InsertFenceForAtomic(InChain, SuccessOrder, Scope, true, dl, DAG, *TLI); SDValue L = @@ -3554,13 +3612,14 @@ void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { getValue(I.getCompareOperand()), getValue(I.getNewValOperand()), MachinePointerInfo(I.getPointerOperand()), 0 /* Alignment */, - TLI->getInsertFencesForAtomic() ? Monotonic : Order, + TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder, + TLI->getInsertFencesForAtomic() ? Monotonic : FailureOrder, Scope); SDValue OutChain = L.getValue(1); if (TLI->getInsertFencesForAtomic()) - OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, + OutChain = InsertFenceForAtomic(OutChain, SuccessOrder, Scope, false, dl, DAG, *TLI); setValue(&I, L); @@ -3637,6 +3696,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { if (I.getAlignment() < VT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic load"); + InChain = TLI->prepareVolatileOrAtomicLoad(InChain, dl, DAG); SDValue L = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain, getValue(I.getPointerOperand()), @@ -5283,7 +5343,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return 0; SmallVector<Value *, 4> Allocas; - GetUnderlyingObjects(I.getArgOperand(1), Allocas, TD); + GetUnderlyingObjects(I.getArgOperand(1), Allocas, DL); for (SmallVectorImpl<Value*>::iterator Object = Allocas.begin(), E = Allocas.end(); Object != E; ++Object) { @@ -5324,6 +5384,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { (void)getControlRoot(); return 0; } + case Intrinsic::clear_cache: + return TLI->getClearCacheBuiltinName(); case Intrinsic::donothing: // ignore return 0; @@ -5366,6 +5428,8 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, int DemoteStackIdx = -100; if (!CanLowerReturn) { + assert(!CS.hasInAllocaArgument() && + "sret demotion is incompatible with inalloca"); uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize( FTy->getReturnType()); unsigned Align = TLI->getDataLayout()->getPrefTypeAlignment( @@ -5508,9 +5572,8 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, /// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the /// value is equal or not-equal to zero. static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) { - for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); - UI != E; ++UI) { - if (const ICmpInst *IC = dyn_cast<ICmpInst>(*UI)) + for (const User *U : V->users()) { + if (const ICmpInst *IC = dyn_cast<ICmpInst>(U)) if (IC->isEquality()) if (const Constant *C = dyn_cast<Constant>(IC->getOperand(1))) if (C->isNullValue()) @@ -5534,7 +5597,7 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, if (const Constant *LoadCst = ConstantFoldLoadFromConstPtr(const_cast<Constant *>(LoadInput), - Builder.TD)) + Builder.DL)) return Builder.getValue(LoadCst); } @@ -5653,9 +5716,13 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { // bloat the code. const TargetLowering *TLI = TM.getTargetLowering(); if (ActuallyDoIt && CSize->getZExtValue() > 4) { + unsigned DstAS = LHS->getType()->getPointerAddressSpace(); + unsigned SrcAS = RHS->getType()->getPointerAddressSpace(); // TODO: Handle 5 byte compare as 4-byte + 1 byte. // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads. - if (!TLI->isTypeLegal(LoadVT) ||!TLI->allowsUnalignedMemoryAccesses(LoadVT)) + if (!TLI->isTypeLegal(LoadVT) || + !TLI->allowsUnalignedMemoryAccesses(LoadVT, SrcAS) || + !TLI->allowsUnalignedMemoryAccesses(LoadVT, DstAS)) ActuallyDoIt = false; } @@ -6026,7 +6093,7 @@ public: /// MVT::Other. EVT getCallOperandValEVT(LLVMContext &Context, const TargetLowering &TLI, - const DataLayout *TD) const { + const DataLayout *DL) const { if (CallOperandVal == 0) return MVT::Other; if (isa<BasicBlock>(CallOperandVal)) @@ -6052,7 +6119,7 @@ public: // If OpTy is not a single value, it may be a struct/union that we // can tile with integers. if (!OpTy->isSingleValueType() && OpTy->isSized()) { - unsigned BitSize = TD->getTypeSizeInBits(OpTy); + unsigned BitSize = DL->getTypeSizeInBits(OpTy); switch (BitSize) { default: break; case 1: @@ -6108,7 +6175,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, // types are identical size, use a bitcast to convert (e.g. two differing // vector types). MVT RegVT = *PhysReg.second->vt_begin(); - if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) { + if (RegVT.getSizeInBits() == OpInfo.CallOperand.getValueSizeInBits()) { OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand); OpInfo.ConstraintVT = RegVT; @@ -6241,7 +6308,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); } - OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), *TLI, TD). + OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), *TLI, DL). getSimpleVT(); } @@ -6716,11 +6783,11 @@ void SelectionDAGBuilder::visitVAStart(const CallInst &I) { void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { const TargetLowering *TLI = TM.getTargetLowering(); - const DataLayout &TD = *TLI->getDataLayout(); + const DataLayout &DL = *TLI->getDataLayout(); SDValue V = DAG.getVAArg(TLI->getValueType(I.getType()), getCurSDLoc(), getRoot(), getValue(I.getOperand(0)), DAG.getSrcValue(I.getOperand(0)), - TD.getABITypeAlignment(I.getType())); + DL.getABITypeAlignment(I.getType())); setValue(&I, V); DAG.setRoot(V.getValue(1)); } @@ -6781,6 +6848,42 @@ SelectionDAGBuilder::LowerCallOperands(const CallInst &CI, unsigned ArgIdx, return TLI->LowerCallTo(CLI); } +/// \brief Add a stack map intrinsic call's live variable operands to a stackmap +/// or patchpoint target node's operand list. +/// +/// Constants are converted to TargetConstants purely as an optimization to +/// avoid constant materialization and register allocation. +/// +/// FrameIndex operands are converted to TargetFrameIndex so that ISEL does not +/// generate addess computation nodes, and so ExpandISelPseudo can convert the +/// TargetFrameIndex into a DirectMemRefOp StackMap location. This avoids +/// address materialization and register allocation, but may also be required +/// for correctness. If a StackMap (or PatchPoint) intrinsic directly uses an +/// alloca in the entry block, then the runtime may assume that the alloca's +/// StackMap location can be read immediately after compilation and that the +/// location is valid at any point during execution (this is similar to the +/// assumption made by the llvm.gcroot intrinsic). If the alloca's location were +/// only available in a register, then the runtime would need to trap when +/// execution reaches the StackMap in order to read the alloca's location. +static void addStackMapLiveVars(const CallInst &CI, unsigned StartIdx, + SmallVectorImpl<SDValue> &Ops, + SelectionDAGBuilder &Builder) { + for (unsigned i = StartIdx, e = CI.getNumArgOperands(); i != e; ++i) { + SDValue OpVal = Builder.getValue(CI.getArgOperand(i)); + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) { + Ops.push_back( + Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); + Ops.push_back( + Builder.DAG.getTargetConstant(C->getSExtValue(), MVT::i64)); + } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(OpVal)) { + const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo(); + Ops.push_back( + Builder.DAG.getTargetFrameIndex(FI->getIndex(), TLI.getPointerTy())); + } else + Ops.push_back(OpVal); + } +} + /// \brief Lower llvm.experimental.stackmap directly to its target opcode. void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { // void @llvm.experimental.stackmap(i32 <id>, i32 <numShadowBytes>, @@ -6788,61 +6891,64 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value."); - SDValue Callee = getValue(CI.getCalledValue()); + SDValue Chain, InFlag, Callee, NullPtr; + SmallVector<SDValue, 32> Ops; - // Lower into a call sequence with no args and no return value. - std::pair<SDValue, SDValue> Result = LowerCallOperands(CI, 0, 0, Callee); - // Set the root to the target-lowered call chain. - SDValue Chain = Result.second; - DAG.setRoot(Chain); + SDLoc DL = getCurSDLoc(); + Callee = getValue(CI.getCalledValue()); + NullPtr = DAG.getIntPtrConstant(0, true); - /// Get a call instruction from the call sequence chain. - /// Tail calls are not allowed. - SDNode *CallEnd = Chain.getNode(); - assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && - "Expected a callseq node."); - SDNode *Call = CallEnd->getOperand(0).getNode(); - bool hasGlue = Call->getGluedNode(); + // The stackmap intrinsic only records the live variables (the arguemnts + // passed to it) and emits NOPS (if requested). Unlike the patchpoint + // intrinsic, this won't be lowered to a function call. This means we don't + // have to worry about calling conventions and target specific lowering code. + // Instead we perform the call lowering right here. + // + // chain, flag = CALLSEQ_START(chain, 0) + // chain, flag = STACKMAP(id, nbytes, ..., chain, flag) + // chain, flag = CALLSEQ_END(chain, 0, 0, flag) + // + Chain = DAG.getCALLSEQ_START(getRoot(), NullPtr, DL); + InFlag = Chain.getValue(1); - // Replace the target specific call node with the stackmap intrinsic. - SmallVector<SDValue, 8> Ops; + // Add the <id> and <numBytes> constants. + SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos)); + Ops.push_back(DAG.getTargetConstant( + cast<ConstantSDNode>(IDVal)->getZExtValue(), MVT::i64)); + SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos)); + Ops.push_back(DAG.getTargetConstant( + cast<ConstantSDNode>(NBytesVal)->getZExtValue(), MVT::i32)); - // Add the <id> and <numShadowBytes> constants. - for (unsigned i = 0; i < 2; ++i) { - SDValue tmp = getValue(CI.getOperand(i)); - Ops.push_back(DAG.getTargetConstant( - cast<ConstantSDNode>(tmp)->getZExtValue(), MVT::i32)); - } // Push live variables for the stack map. - for (unsigned i = 2, e = CI.getNumArgOperands(); i != e; ++i) - Ops.push_back(getValue(CI.getArgOperand(i))); + addStackMapLiveVars(CI, 2, Ops, *this); - // Push the chain (this is originally the first operand of the call, but - // becomes now the last or second to last operand). - Ops.push_back(*(Call->op_begin())); + // We are not pushing any register mask info here on the operands list, + // because the stackmap doesn't clobber anything. - // Push the glue flag (last operand). - if (hasGlue) - Ops.push_back(*(Call->op_end()-1)); + // Push the chain and the glue flag. + Ops.push_back(Chain); + Ops.push_back(InFlag); + // Create the STACKMAP node. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + SDNode *SM = DAG.getMachineNode(TargetOpcode::STACKMAP, DL, NodeTys, Ops); + Chain = SDValue(SM, 0); + InFlag = Chain.getValue(1); - // Replace the target specific call node with a STACKMAP node. - MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::STACKMAP, getCurSDLoc(), - NodeTys, Ops); + Chain = DAG.getCALLSEQ_END(Chain, NullPtr, NullPtr, InFlag, DL); - // StackMap generates no value, so nothing goes in the NodeMap. + // Stackmaps don't generate values, so nothing goes into the NodeMap. - // Fixup the consumers of the intrinsic. The chain and glue may be used in the - // call sequence. - DAG.ReplaceAllUsesWith(Call, MN); + // Set the root to the target-lowered call chain. + DAG.setRoot(Chain); - DAG.DeleteNode(Call); + // Inform the Frame Information that we have a stackmap in this function. + FuncInfo.MF->getFrameInfo()->setHasStackMap(); } /// \brief Lower llvm.experimental.patchpoint directly to its target opcode. void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { - // void|i64 @llvm.experimental.patchpoint.void|i64(i32 <id>, + // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>, // i32 <numBytes>, // i8* <target>, // i32 <numArgs>, @@ -6855,17 +6961,19 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { SDValue Callee = getValue(CI.getOperand(2)); // <target> // Get the real number of arguments participating in the call <numArgs> - unsigned NumArgs = - cast<ConstantSDNode>(getValue(CI.getArgOperand(3)))->getZExtValue(); + SDValue NArgVal = getValue(CI.getArgOperand(PatchPointOpers::NArgPos)); + unsigned NumArgs = cast<ConstantSDNode>(NArgVal)->getZExtValue(); // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs> - assert(CI.getNumArgOperands() >= NumArgs + 4 && + // Intrinsics include all meta-operands up to but not including CC. + unsigned NumMetaOpers = PatchPointOpers::CCPos; + assert(CI.getNumArgOperands() >= NumMetaOpers + NumArgs && "Not enough arguments provided to the patchpoint intrinsic"); // For AnyRegCC the arguments are lowered later on manually. unsigned NumCallArgs = isAnyRegCC ? 0 : NumArgs; std::pair<SDValue, SDValue> Result = - LowerCallOperands(CI, 4, NumCallArgs, Callee, isAnyRegCC); + LowerCallOperands(CI, NumMetaOpers, NumCallArgs, Callee, isAnyRegCC); // Set the root to the target-lowered call chain. SDValue Chain = Result.second; @@ -6885,13 +6993,16 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { // Replace the target specific call node with the patchable intrinsic. SmallVector<SDValue, 8> Ops; - // Add the <id> and <numNopBytes> constants. - for (unsigned i = 0; i < 2; ++i) { - SDValue tmp = getValue(CI.getOperand(i)); - Ops.push_back(DAG.getTargetConstant( - cast<ConstantSDNode>(tmp)->getZExtValue(), MVT::i32)); - } + // Add the <id> and <numBytes> constants. + SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos)); + Ops.push_back(DAG.getTargetConstant( + cast<ConstantSDNode>(IDVal)->getZExtValue(), MVT::i64)); + SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos)); + Ops.push_back(DAG.getTargetConstant( + cast<ConstantSDNode>(NBytesVal)->getZExtValue(), MVT::i32)); + // Assume that the Callee is a constant address. + // FIXME: handle function symbols in the future. Ops.push_back( DAG.getIntPtrConstant(cast<ConstantSDNode>(Callee)->getZExtValue(), /*isTarget=*/true)); @@ -6909,25 +7020,16 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { // Add the arguments we omitted previously. The register allocator should // place these in any free register. if (isAnyRegCC) - for (unsigned i = 4, e = NumArgs + 4; i != e; ++i) + for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i) Ops.push_back(getValue(CI.getArgOperand(i))); - // Push the arguments from the call instruction. + // Push the arguments from the call instruction up to the register mask. SDNode::op_iterator e = hasGlue ? Call->op_end()-2 : Call->op_end()-1; for (SDNode::op_iterator i = Call->op_begin()+2; i != e; ++i) Ops.push_back(*i); // Push live variables for the stack map. - for (unsigned i = NumArgs + 4, e = CI.getNumArgOperands(); i != e; ++i) { - SDValue OpVal = getValue(CI.getArgOperand(i)); - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) { - Ops.push_back( - DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); - Ops.push_back( - DAG.getTargetConstant(C->getSExtValue(), MVT::i64)); - } else - Ops.push_back(OpVal); - } + addStackMapLiveVars(CI, NumMetaOpers + NumArgs, Ops, *this); // Push the register mask info. if (hasGlue) @@ -6981,6 +7083,9 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { } else DAG.ReplaceAllUsesWith(Call, MN); DAG.DeleteNode(Call); + + // Inform the Frame Information that we have a patchpoint in this function. + FuncInfo.MF->getFrameInfo()->setHasPatchPoint(); } /// TargetLowering::LowerCallTo - This is the default LowerCallTo @@ -7037,8 +7142,18 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Flags.setInReg(); if (Args[i].isSRet) Flags.setSRet(); - if (Args[i].isByVal) { + if (Args[i].isByVal) Flags.setByVal(); + if (Args[i].isInAlloca) { + Flags.setInAlloca(); + // Set the byval flag for CCAssignFn callbacks that don't know about + // inalloca. This way we can know how many bytes we should've allocated + // and how many bytes a callee cleanup function will pop. If we port + // inalloca to more targets, we'll have to add custom inalloca handling + // in the various CC lowering callbacks. + Flags.setByVal(); + } + if (Args[i].isByVal || Args[i].isInAlloca) { PointerType *Ty = cast<PointerType>(Args[i].Ty); Type *ElementTy = Ty->getElementType(); Flags.setByValSize(getDataLayout()->getTypeAllocSize(ElementTy)); @@ -7202,12 +7317,10 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) { return A->use_empty(); const BasicBlock *Entry = A->getParent()->begin(); - for (Value::const_use_iterator UI = A->use_begin(), E = A->use_end(); - UI != E; ++UI) { - const User *U = *UI; + for (const User *U : A->users()) if (cast<Instruction>(U)->getParent() != Entry || isa<SwitchInst>(U)) return false; // Use not in entry block. - } + return true; } @@ -7215,7 +7328,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { SelectionDAG &DAG = SDB->DAG; SDLoc dl = SDB->getCurSDLoc(); const TargetLowering *TLI = getTargetLowering(); - const DataLayout *TD = TLI->getDataLayout(); + const DataLayout *DL = TLI->getDataLayout(); SmallVector<ISD::InputArg, 16> Ins; if (!FuncInfo->CanLowerReturn) { @@ -7247,7 +7360,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); ISD::ArgFlagsTy Flags; unsigned OriginalAlignment = - TD->getABITypeAlignment(ArgTy); + DL->getABITypeAlignment(ArgTy); if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) Flags.setZExt(); @@ -7257,11 +7370,21 @@ void SelectionDAGISel::LowerArguments(const Function &F) { Flags.setInReg(); if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet)) Flags.setSRet(); - if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) { + if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) Flags.setByVal(); + if (F.getAttributes().hasAttribute(Idx, Attribute::InAlloca)) { + Flags.setInAlloca(); + // Set the byval flag for CCAssignFn callbacks that don't know about + // inalloca. This way we can know how many bytes we should've allocated + // and how many bytes a callee cleanup function will pop. If we port + // inalloca to more targets, we'll have to add custom inalloca handling + // in the various CC lowering callbacks. + Flags.setByVal(); + } + if (Flags.isByVal() || Flags.isInAlloca()) { PointerType *Ty = cast<PointerType>(I->getType()); Type *ElementTy = Ty->getElementType(); - Flags.setByValSize(TD->getTypeAllocSize(ElementTy)); + Flags.setByValSize(DL->getTypeAllocSize(ElementTy)); // For ByVal, alignment should be passed from FE. BE will guess if // this info is not there but there are cases it cannot get right. unsigned FrameAlign; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 835f643..66835bf 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -18,9 +18,8 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" -#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" -#include "llvm/Support/CallSite.h" #include "llvm/Support/ErrorHandling.h" #include <vector> @@ -57,6 +56,7 @@ class MachineBasicBlock; class MachineInstr; class MachineRegisterInfo; class MDNode; +class MVT; class PHINode; class PtrToIntInst; class ReturnInst; @@ -488,8 +488,12 @@ private: private: const TargetMachine &TM; public: + /// Lowest valid SDNodeOrder. The special case 0 is reserved for scheduling + /// nodes without a corresponding SDNode. + static const unsigned LowestSDNodeOrder = 1; + SelectionDAG &DAG; - const DataLayout *TD; + const DataLayout *DL; AliasAnalysis *AA; const TargetLibraryInfo *LibInfo; @@ -534,7 +538,7 @@ public: SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo, CodeGenOpt::Level ol) - : CurInst(NULL), SDNodeOrder(0), TM(dag.getTarget()), + : CurInst(NULL), SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()), DAG(dag), FuncInfo(funcinfo), OptLevel(ol), HasTailCall(false) { } @@ -608,11 +612,13 @@ public: void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, - MachineBasicBlock *SwitchBB, unsigned Opc); + MachineBasicBlock *SwitchBB, unsigned Opc, + uint32_t TW, uint32_t FW); void EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, - MachineBasicBlock *SwitchBB); + MachineBasicBlock *SwitchBB, + uint32_t TW, uint32_t FW); bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases); bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB); void CopyToExportRegsIfNeeded(const Value *V); @@ -627,7 +633,7 @@ public: bool useVoidTy = false); /// UpdateSplitBlock - When an MBB was split during scheduling, update the - /// references that ned to refer to the last resulting block. + /// references that need to refer to the last resulting block. void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last); private: diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index c04a08d..535feba 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -14,11 +14,10 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "ScheduleDAGSDNodes.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/DebugInfo.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/Intrinsics.h" #include "llvm/Support/Debug.h" @@ -82,7 +81,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::VALUETYPE: return "ValueType"; case ISD::Register: return "Register"; case ISD::RegisterMask: return "RegisterMask"; - case ISD::Constant: return "Constant"; + case ISD::Constant: + if (cast<ConstantSDNode>(this)->isOpaque()) + return "OpaqueConstant"; + return "Constant"; case ISD::ConstantFP: return "ConstantFP"; case ISD::GlobalAddress: return "GlobalAddress"; case ISD::GlobalTLSAddress: return "GlobalTLSAddress"; @@ -112,7 +114,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { } case ISD::BUILD_VECTOR: return "BUILD_VECTOR"; - case ISD::TargetConstant: return "TargetConstant"; + case ISD::TargetConstant: + if (cast<ConstantSDNode>(this)->isOpaque()) + return "OpaqueTargetConstant"; + return "TargetConstant"; case ISD::TargetConstantFP: return "TargetConstantFP"; case ISD::TargetGlobalAddress: return "TargetGlobalAddress"; case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress"; @@ -352,7 +357,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(), e = MN->memoperands_end(); i != e; ++i) { OS << **i; - if (llvm::next(i) != e) + if (std::next(i) != e) OS << " "; } OS << ">"; @@ -385,7 +390,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { dyn_cast<GlobalAddressSDNode>(this)) { int64_t offset = GADN->getOffset(); OS << '<'; - WriteAsOperand(OS, GADN->getGlobal()); + GADN->getGlobal()->printAsOperand(OS); OS << '>'; if (offset > 0) OS << " + " << offset; @@ -476,9 +481,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { dyn_cast<BlockAddressSDNode>(this)) { int64_t offset = BA->getOffset(); OS << "<"; - WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false); + BA->getBlockAddress()->getFunction()->printAsOperand(OS, false); OS << ", "; - WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false); + BA->getBlockAddress()->getBasicBlock()->printAsOperand(OS, false); OS << ">"; if (offset > 0) OS << " + " << offset; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 3a0cfa1..5d0e2b9 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -20,7 +20,6 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" @@ -33,8 +32,8 @@ #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/DebugInfo.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" @@ -213,7 +212,7 @@ MachinePassRegistry RegisterScheduler::Registry; static cl::opt<RegisterScheduler::FunctionPassCtor, false, RegisterPassParser<RegisterScheduler> > ISHeuristic("pre-RA-sched", - cl::init(&createDefaultScheduler), + cl::init(&createDefaultScheduler), cl::Hidden, cl::desc("Instruction schedulers available (before register" " allocation):")); @@ -400,7 +399,6 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { RegInfo = &MF->getRegInfo(); AA = &getAnalysis<AliasAnalysis>(); LibInfo = &getAnalysis<TargetLibraryInfo>(); - TTI = getAnalysisIfAvailable<TargetTransformInfo>(); GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : 0; TargetSubtargetInfo &ST = @@ -418,8 +416,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), this); - CurDAG->init(*MF, TTI, TLI); - FuncInfo->set(Fn, *MF); + CurDAG->init(*MF, TLI); + FuncInfo->set(Fn, *MF, CurDAG); if (UseMBPI && OptLevel != CodeGenOpt::None) FuncInfo->BPI = &getAnalysis<BranchProbabilityInfo>(); @@ -428,7 +426,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { SDB->init(GFI, *AA, LibInfo); - MF->setHasMSInlineAsm(false); + MF->setHasInlineAsm(false); + SelectAllBasicBlocks(Fn); // If the first basic block in the function has live ins that need to be @@ -448,7 +447,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) { MachineInstr *MI = FuncInfo->ArgDbgValues[e-i-1]; bool hasFI = MI->getOperand(0).isFI(); - unsigned Reg = hasFI ? TRI.getFrameRegister(*MF) : MI->getOperand(0).getReg(); + unsigned Reg = + hasFI ? TRI.getFrameRegister(*MF) : MI->getOperand(0).getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) EntryMBB->insert(EntryMBB->begin(), MI); else { @@ -456,7 +456,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { if (Def) { MachineBasicBlock::iterator InsertPos = Def; // FIXME: VR def may not be in entry block. - Def->getParent()->insert(llvm::next(InsertPos), MI); + Def->getParent()->insert(std::next(InsertPos), MI); } else DEBUG(dbgs() << "Dropping debug info for dead vreg" << TargetRegisterInfo::virtReg2Index(Reg) << "\n"); @@ -483,9 +483,10 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // that COPY instructions also need DBG_VALUE, if it is the only // user of LDI->second. MachineInstr *CopyUseMI = NULL; - for (MachineRegisterInfo::use_iterator - UI = RegInfo->use_begin(LDI->second); - MachineInstr *UseMI = UI.skipInstruction();) { + for (MachineRegisterInfo::use_instr_iterator + UI = RegInfo->use_instr_begin(LDI->second), + E = RegInfo->use_instr_end(); UI != E; ) { + MachineInstr *UseMI = &*(UI++); if (UseMI->isDebugValue()) continue; if (UseMI->isCopy() && !CopyUseMI && UseMI->getParent() == EntryMBB) { CopyUseMI = UseMI; continue; @@ -511,7 +512,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E; ++I) { - if (MFI->hasCalls() && MF->hasMSInlineAsm()) + if (MFI->hasCalls() && MF->hasInlineAsm()) break; const MachineBasicBlock *MBB = I; @@ -522,8 +523,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { II->isStackAligningInlineAsm()) { MFI->setHasCalls(true); } - if (II->isMSInlineAsm()) { - MF->setHasMSInlineAsm(true); + if (II->isInlineAsm()) { + MF->setHasInlineAsm(true); } } } @@ -563,6 +564,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // at this point. FuncInfo->clear(); + DEBUG(dbgs() << "*** MachineFunction at end of ISel ***\n"); + DEBUG(MF->print(dbgs())); + return true; } @@ -800,7 +804,7 @@ public: /// NodeDeleted - Handle nodes deleted from the graph. If the node being /// deleted is the current ISelPosition node, update ISelPosition. /// - virtual void NodeDeleted(SDNode *N, SDNode *E) { + void NodeDeleted(SDNode *N, SDNode *E) override { if (ISelPosition == SelectionDAG::allnodes_iterator(N)) ++ISelPosition; } @@ -1063,7 +1067,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // where they are, so we can be sure to emit subsequent instructions // after them. if (FuncInfo->InsertPt != FuncInfo->MBB->begin()) - FastIS->setLastLocalValue(llvm::prior(FuncInfo->InsertPt)); + FastIS->setLastLocalValue(std::prev(FuncInfo->InsertPt)); else FastIS->setLastLocalValue(0); } @@ -1071,7 +1075,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { unsigned NumFastIselRemaining = std::distance(Begin, End); // Do FastISel on as many instructions as possible. for (; BI != Begin; --BI) { - const Instruction *Inst = llvm::prior(BI); + const Instruction *Inst = std::prev(BI); // If we no longer require this instruction, skip it. if (isFoldedOrDeadInstruction(Inst, FuncInfo)) { @@ -1092,7 +1096,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Try to fold the load if so. const Instruction *BeforeInst = Inst; while (BeforeInst != Begin) { - BeforeInst = llvm::prior(BasicBlock::const_iterator(BeforeInst)); + BeforeInst = std::prev(BasicBlock::const_iterator(BeforeInst)); if (!isFoldedOrDeadInstruction(BeforeInst, FuncInfo)) break; } @@ -1100,7 +1104,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { BeforeInst->hasOneUse() && FastIS->tryToFoldLoad(cast<LoadInst>(BeforeInst), Inst)) { // If we succeeded, don't re-select the load. - BI = llvm::next(BasicBlock::const_iterator(BeforeInst)); + BI = std::next(BasicBlock::const_iterator(BeforeInst)); --NumFastIselRemaining; ++NumFastIselSuccess; } @@ -2194,8 +2198,7 @@ CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex, - SDValue N, const TargetLowering *TLI, - unsigned ChildNo) { + SDValue N, const TargetLowering *TLI, unsigned ChildNo) { if (ChildNo >= N.getNumOperands()) return false; // Match fails if out of range child #. return ::CheckType(MatcherTable, MatcherIndex, N.getOperand(ChildNo), TLI); @@ -2231,6 +2234,14 @@ CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex, } LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +CheckChildInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N, unsigned ChildNo) { + if (ChildNo >= N.getNumOperands()) + return false; // Match fails if out of range child #. + return ::CheckInteger(MatcherTable, MatcherIndex, N.getOperand(ChildNo)); +} + +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const SelectionDAGISel &SDISel) { int64_t Val = MatcherTable[MatcherIndex++]; @@ -2313,6 +2324,14 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table, case SelectionDAGISel::OPC_CheckInteger: Result = !::CheckInteger(Table, Index, N); return Index; + case SelectionDAGISel::OPC_CheckChild0Integer: + case SelectionDAGISel::OPC_CheckChild1Integer: + case SelectionDAGISel::OPC_CheckChild2Integer: + case SelectionDAGISel::OPC_CheckChild3Integer: + case SelectionDAGISel::OPC_CheckChild4Integer: + Result = !::CheckChildInteger(Table, Index, N, + Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Integer); + return Index; case SelectionDAGISel::OPC_CheckAndImm: Result = !::CheckAndImm(Table, Index, N, SDISel); return Index; @@ -2693,6 +2712,12 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case OPC_CheckInteger: if (!::CheckInteger(MatcherTable, MatcherIndex, N)) break; continue; + case OPC_CheckChild0Integer: case OPC_CheckChild1Integer: + case OPC_CheckChild2Integer: case OPC_CheckChild3Integer: + case OPC_CheckChild4Integer: + if (!::CheckChildInteger(MatcherTable, MatcherIndex, N, + Opcode-OPC_CheckChild0Integer)) break; + continue; case OPC_CheckAndImm: if (!::CheckAndImm(MatcherTable, MatcherIndex, N, *this)) break; continue; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index b752b48..1483fdd 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -15,12 +15,11 @@ #include "ScheduleDAGSDNodes.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/DebugInfo.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 82b068d..5de0b03 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -22,6 +22,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/Support/CommandLine.h" @@ -74,6 +75,7 @@ void TargetLowering::ArgListEntry::setAttributes(ImmutableCallSite *CS, isSRet = CS->paramHasAttr(AttrIdx, Attribute::StructRet); isNest = CS->paramHasAttr(AttrIdx, Attribute::Nest); isByVal = CS->paramHasAttr(AttrIdx, Attribute::ByVal); + isInAlloca = CS->paramHasAttr(AttrIdx, Attribute::InAlloca); isReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned); Alignment = CS->getParamAlignment(AttrIdx); } @@ -1115,6 +1117,54 @@ static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) { (KnownOne.countPopulation() == 1); } +bool TargetLowering::isConstTrueVal(const SDNode *N) const { + if (!N) + return false; + + bool IsVec = false; + const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); + if (!CN) { + const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N); + if (!BV) + return false; + + IsVec = true; + CN = BV->getConstantSplatValue(); + } + + switch (getBooleanContents(IsVec)) { + case UndefinedBooleanContent: + return CN->getAPIntValue()[0]; + case ZeroOrOneBooleanContent: + return CN->isOne(); + case ZeroOrNegativeOneBooleanContent: + return CN->isAllOnesValue(); + } + + llvm_unreachable("Invalid boolean contents"); +} + +bool TargetLowering::isConstFalseVal(const SDNode *N) const { + if (!N) + return false; + + bool IsVec = false; + const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); + if (!CN) { + const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N); + if (!BV) + return false; + + IsVec = true; + CN = BV->getConstantSplatValue(); + } + + if (getBooleanContents(IsVec) == UndefinedBooleanContent) + return !CN->getAPIntValue()[0]; + + return CN->isNullValue(); +} + /// SimplifySetCC - Try to simplify a setcc built with the specified operands /// and cc. If it is unable to simplify it, return a null SDValue. SDValue @@ -1468,18 +1518,32 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // Canonicalize GE/LE comparisons to use GT/LT comparisons. if (Cond == ISD::SETGE || Cond == ISD::SETUGE) { if (C1 == MinVal) return DAG.getConstant(1, VT); // X >= MIN --> true - // X >= C0 --> X > (C0-1) - return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(C1-1, N1.getValueType()), - (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT); + // X >= C0 --> X > (C0 - 1) + APInt C = C1 - 1; + ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT; + if ((DCI.isBeforeLegalizeOps() || + isCondCodeLegal(NewCC, VT.getSimpleVT())) && + (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 && + isLegalICmpImmediate(C.getSExtValue())))) { + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(C, N1.getValueType()), + NewCC); + } } if (Cond == ISD::SETLE || Cond == ISD::SETULE) { if (C1 == MaxVal) return DAG.getConstant(1, VT); // X <= MAX --> true - // X <= C0 --> X < (C0+1) - return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(C1+1, N1.getValueType()), - (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT); + // X <= C0 --> X < (C0 + 1) + APInt C = C1 + 1; + ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT; + if ((DCI.isBeforeLegalizeOps() || + isCondCodeLegal(NewCC, VT.getSimpleVT())) && + (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 && + isLegalICmpImmediate(C.getSExtValue())))) { + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(C, N1.getValueType()), + NewCC); + } } if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal) @@ -1535,7 +1599,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, N0.getOpcode() == ISD::AND) if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { - EVT ShiftTy = DCI.isBeforeLegalizeOps() ? + EVT ShiftTy = DCI.isBeforeLegalize() ? getPointerTy() : getShiftAmountTy(N0.getValueType()); if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3 // Perform the xform if the AND RHS is a single bit. @@ -1565,7 +1629,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, const APInt &AndRHSC = AndRHS->getAPIntValue(); if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) { unsigned ShiftBits = AndRHSC.countTrailingZeros(); - EVT ShiftTy = DCI.isBeforeLegalizeOps() ? + EVT ShiftTy = DCI.isBeforeLegalize() ? getPointerTy() : getShiftAmountTy(N0.getValueType()); EVT CmpTy = N0.getValueType(); SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0), @@ -1593,7 +1657,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } NewC = NewC.lshr(ShiftBits); if (ShiftBits && isLegalICmpImmediate(NewC.getSExtValue())) { - EVT ShiftTy = DCI.isBeforeLegalizeOps() ? + EVT ShiftTy = DCI.isBeforeLegalize() ? getPointerTy() : getShiftAmountTy(N0.getValueType()); EVT CmpTy = N0.getValueType(); SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0, @@ -2663,3 +2727,14 @@ BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, DAG.getConstant(magics.s-1, getShiftAmountTy(NPQ.getValueType()))); } } + +bool TargetLowering:: +verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const { + if (!isa<ConstantSDNode>(Op.getOperand(0))) { + DAG.getContext()->emitError("argument to '__builtin_return_address' must " + "be a constant integer"); + return true; + } + + return false; +} diff --git a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp index f769b44..1120be8 100644 --- a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp +++ b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp @@ -16,7 +16,7 @@ using namespace llvm; TargetSelectionDAGInfo::TargetSelectionDAGInfo(const TargetMachine &TM) - : TD(TM.getDataLayout()) { + : DL(TM.getDataLayout()) { } TargetSelectionDAGInfo::~TargetSelectionDAGInfo() { diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp index 10f64c7..adb3ef9 100644 --- a/lib/CodeGen/ShadowStackGC.cpp +++ b/lib/CodeGen/ShadowStackGC.cpp @@ -29,10 +29,10 @@ #include "llvm/CodeGen/GCs.h" #include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/GCStrategy.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" -#include "llvm/Support/CallSite.h" using namespace llvm; @@ -55,8 +55,8 @@ namespace { public: ShadowStackGC(); - bool initializeCustomLowering(Module &M); - bool performCustomLowering(Function &F); + bool initializeCustomLowering(Module &M) override; + bool performCustomLowering(Function &F) override; private: bool IsNullValue(Value *V); diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index da2e710..dc7ca2b 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -60,11 +60,11 @@ class SjLjEHPrepare : public FunctionPass { public: static char ID; // Pass identification, replacement for typeid explicit SjLjEHPrepare(const TargetMachine *TM) : FunctionPass(ID), TM(TM) {} - bool doInitialization(Module &M); - bool runOnFunction(Function &F); + bool doInitialization(Module &M) override; + bool runOnFunction(Function &F) override; - virtual void getAnalysisUsage(AnalysisUsage &AU) const {} - const char *getPassName() const { + void getAnalysisUsage(AnalysisUsage &AU) const override {} + const char *getPassName() const override { return "SJLJ Exception Handling preparation"; } @@ -149,7 +149,7 @@ static void MarkBlocksLiveIn(BasicBlock *BB, /// instruction with those returned by the personality function. void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, Value *SelVal) { - SmallVector<Value *, 8> UseWorkList(LPI->use_begin(), LPI->use_end()); + SmallVector<Value *, 8> UseWorkList(LPI->user_begin(), LPI->user_end()); while (!UseWorkList.empty()) { Value *Val = UseWorkList.pop_back_val(); ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Val); @@ -173,7 +173,7 @@ void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, Type *LPadType = LPI->getType(); Value *LPadVal = UndefValue::get(LPadType); IRBuilder<> Builder( - llvm::next(BasicBlock::iterator(cast<Instruction>(SelVal)))); + std::next(BasicBlock::iterator(cast<Instruction>(SelVal)))); LPadVal = Builder.CreateInsertValue(LPadVal, ExnVal, 0, "lpad.val"); LPadVal = Builder.CreateInsertValue(LPadVal, SelVal, 1, "lpad.val"); @@ -251,7 +251,7 @@ void SjLjEHPrepare::lowerIncomingArguments(Function &F) { // Aggregate types can't be cast, but are legal argument types, so we have // to handle them differently. We use an extract/insert pair as a // lightweight method to achieve the same goal. - if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) { + if (isa<StructType>(Ty) || isa<ArrayType>(Ty)) { Instruction *EI = ExtractValueInst::Create(AI, 0, "", AfterAllocaInsPt); Instruction *NI = InsertValueInst::Create(AI, EI, 0); NI->insertAfter(EI); @@ -294,8 +294,8 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, if (Inst->use_empty()) continue; if (Inst->hasOneUse() && - cast<Instruction>(Inst->use_back())->getParent() == BB && - !isa<PHINode>(Inst->use_back())) + cast<Instruction>(Inst->user_back())->getParent() == BB && + !isa<PHINode>(Inst->user_back())) continue; // If this is an alloca in the entry block, it's not a real register @@ -306,11 +306,10 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, // Avoid iterator invalidation by copying users to a temporary vector. SmallVector<Instruction *, 16> Users; - for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end(); - UI != E; ++UI) { - Instruction *User = cast<Instruction>(*UI); - if (User->getParent() != BB || isa<PHINode>(User)) - Users.push_back(User); + for (User *U : Inst->users()) { + Instruction *UI = cast<Instruction>(U); + if (UI->getParent() != BB || isa<PHINode>(UI)) + Users.push_back(UI); } // Find all of the blocks that this value is live in. diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp index 20049a8..a6c6261 100644 --- a/lib/CodeGen/SlotIndexes.cpp +++ b/lib/CodeGen/SlotIndexes.cpp @@ -129,7 +129,7 @@ void SlotIndexes::renumberIndexes(IndexList::iterator curItr) { const unsigned Space = SlotIndex::InstrDist/2; assert((Space & 3) == 0 && "InstrDist must be a multiple of 2*NUM"); - IndexList::iterator startItr = prior(curItr); + IndexList::iterator startItr = std::prev(curItr); unsigned index = startItr->getIndex(); do { curItr->setIndex(index += Space); diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp index 10a93b7..5f73469 100644 --- a/lib/CodeGen/SpillPlacement.cpp +++ b/lib/CodeGen/SpillPlacement.cpp @@ -188,10 +188,10 @@ bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) { // Compute total ingoing and outgoing block frequencies for all bundles. BlockFrequencies.resize(mf.getNumBlockIDs()); - MachineBlockFrequencyInfo &MBFI = getAnalysis<MachineBlockFrequencyInfo>(); + MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I) { unsigned Num = I->getNumber(); - BlockFrequencies[Num] = MBFI.getBlockFreq(I); + BlockFrequencies[Num] = MBFI->getBlockFreq(I); } // We never change the function. @@ -221,7 +221,7 @@ void SpillPlacement::activate(unsigned n) { // Hopfield network. if (bundles->getBlocks(n).size() > 100) { nodes[n].BiasP = 0; - nodes[n].BiasN = (BlockFrequency::getEntryFrequency() / 16); + nodes[n].BiasN = (MBFI->getEntryFreq() / 16); } } @@ -323,10 +323,12 @@ void SpillPlacement::iterate() { // affect the entire network in a single iteration. That means very fast // convergence, usually in a single iteration. for (unsigned iteration = 0; iteration != 10; ++iteration) { - // Scan backwards, skipping the last node which was just updated. + // Scan backwards, skipping the last node when iteration is not zero. When + // iteration is not zero, the last node was just updated. bool Changed = false; for (SmallVectorImpl<unsigned>::const_reverse_iterator I = - llvm::next(Linked.rbegin()), E = Linked.rend(); I != E; ++I) { + iteration == 0 ? Linked.rbegin() : std::next(Linked.rbegin()), + E = Linked.rend(); I != E; ++I) { unsigned n = *I; if (nodes[n].update(nodes)) { Changed = true; @@ -340,7 +342,7 @@ void SpillPlacement::iterate() { // Scan forwards, skipping the first node which was just updated. Changed = false; for (SmallVectorImpl<unsigned>::const_iterator I = - llvm::next(Linked.begin()), E = Linked.end(); I != E; ++I) { + std::next(Linked.begin()), E = Linked.end(); I != E; ++I) { unsigned n = *I; if (nodes[n].update(nodes)) { Changed = true; diff --git a/lib/CodeGen/SpillPlacement.h b/lib/CodeGen/SpillPlacement.h index 105516b..a88d7ac 100644 --- a/lib/CodeGen/SpillPlacement.h +++ b/lib/CodeGen/SpillPlacement.h @@ -38,12 +38,14 @@ class BitVector; class EdgeBundles; class MachineBasicBlock; class MachineLoopInfo; +class MachineBlockFrequencyInfo; class SpillPlacement : public MachineFunctionPass { struct Node; const MachineFunction *MF; const EdgeBundles *bundles; const MachineLoopInfo *loops; + const MachineBlockFrequencyInfo *MBFI; Node *nodes; // Nodes that are active in the current computation. Owned by the prepare() @@ -145,9 +147,9 @@ public: } private: - virtual bool runOnMachineFunction(MachineFunction&); - virtual void getAnalysisUsage(AnalysisUsage&) const; - virtual void releaseMemory(); + bool runOnMachineFunction(MachineFunction&) override; + void getAnalysisUsage(AnalysisUsage&) const override; + void releaseMemory() override; void activate(unsigned); }; diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp index d5b3a4a..094641c 100644 --- a/lib/CodeGen/Spiller.cpp +++ b/lib/CodeGen/Spiller.cpp @@ -89,8 +89,9 @@ protected: unsigned ss = vrm->assignVirt2StackSlot(li->reg); // Iterate over reg uses/defs. - for (MachineRegisterInfo::reg_iterator - regItr = mri->reg_begin(li->reg); regItr != mri->reg_end();) { + for (MachineRegisterInfo::reg_instr_iterator + regItr = mri->reg_instr_begin(li->reg); + regItr != mri->reg_instr_end();) { // Grab the use/def instr. MachineInstr *mi = &*regItr; @@ -98,9 +99,7 @@ protected: DEBUG(dbgs() << " Processing " << *mi); // Step regItr to the next use/def instr. - do { - ++regItr; - } while (regItr != mri->reg_end() && (&*regItr == mi)); + ++regItr; // Collect uses & defs for this instr. SmallVector<unsigned, 2> indices; @@ -143,9 +142,9 @@ protected: if (hasDef) { MachineInstrSpan MIS(miItr); - tii->storeRegToStackSlot(*mi->getParent(), llvm::next(miItr), NewVReg, + tii->storeRegToStackSlot(*mi->getParent(), std::next(miItr), NewVReg, true, ss, trc, tri); - lis->InsertMachineInstrRangeInMaps(llvm::next(miItr), MIS.end()); + lis->InsertMachineInstrRangeInMaps(std::next(miItr), MIS.end()); } } } @@ -164,7 +163,7 @@ public: VirtRegMap &vrm) : SpillerBase(pass, mf, vrm) {} - void spill(LiveRangeEdit &LRE) { + void spill(LiveRangeEdit &LRE) override { // Ignore spillIs - we don't use it. trivialSpillEverywhere(LRE); } diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index 68a15f7..16fe979 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -131,11 +131,9 @@ void SplitAnalysis::analyzeUses() { // Get use slots form the use-def chain. const MachineRegisterInfo &MRI = MF.getRegInfo(); - for (MachineRegisterInfo::use_nodbg_iterator - I = MRI.use_nodbg_begin(CurLI->reg), E = MRI.use_nodbg_end(); I != E; - ++I) - if (!I.getOperand().isUndef()) - UseSlots.push_back(LIS.getInstructionIndex(&*I).getRegSlot()); + for (MachineOperand &MO : MRI.use_nodbg_operands(CurLI->reg)) + if (!MO.isUndef()) + UseSlots.push_back(LIS.getInstructionIndex(MO.getParent()).getRegSlot()); array_pod_sort(UseSlots.begin(), UseSlots.end()); @@ -188,7 +186,7 @@ bool SplitAnalysis::calcLiveBlockInfo() { BlockInfo BI; BI.MBB = MFI; SlotIndex Start, Stop; - tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB); + std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB); // If the block contains no uses, the range must be live through. At one // point, RegisterCoalescer could create dangling ranges that ended @@ -509,7 +507,7 @@ SlotIndex SplitEditor::enterIntvAfter(SlotIndex Idx) { assert(MI && "enterIntvAfter called with invalid index"); VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Idx, *MI->getParent(), - llvm::next(MachineBasicBlock::iterator(MI))); + std::next(MachineBasicBlock::iterator(MI))); return VNI->def; } @@ -570,7 +568,7 @@ SlotIndex SplitEditor::leaveIntvAfter(SlotIndex Idx) { } VNInfo *VNI = defFromParent(0, ParentVNI, Boundary, *MI->getParent(), - llvm::next(MachineBasicBlock::iterator(MI))); + std::next(MachineBasicBlock::iterator(MI))); return VNI->def; } @@ -888,7 +886,7 @@ bool SplitEditor::transferValues() { // LiveInBlocks. MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start); SlotIndex BlockStart, BlockEnd; - tie(BlockStart, BlockEnd) = LIS.getSlotIndexes()->getMBBRange(MBB); + std::tie(BlockStart, BlockEnd) = LIS.getSlotIndexes()->getMBBRange(MBB); // The first block may be live-in, or it may have its own def. if (Start != BlockStart) { @@ -972,7 +970,7 @@ void SplitEditor::extendPHIKillRanges() { void SplitEditor::rewriteAssigned(bool ExtendRanges) { for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Edit->getReg()), RE = MRI.reg_end(); RI != RE;) { - MachineOperand &MO = RI.getOperand(); + MachineOperand &MO = *RI; MachineInstr *MI = MO.getParent(); ++RI; // LiveDebugVariables should have handled all DBG_VALUE instructions. @@ -1183,7 +1181,7 @@ void SplitEditor::splitLiveThroughBlock(unsigned MBBNum, unsigned IntvIn, SlotIndex LeaveBefore, unsigned IntvOut, SlotIndex EnterAfter){ SlotIndex Start, Stop; - tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(MBBNum); + std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(MBBNum); DEBUG(dbgs() << "BB#" << MBBNum << " [" << Start << ';' << Stop << ") intf " << LeaveBefore << '-' << EnterAfter @@ -1286,7 +1284,7 @@ void SplitEditor::splitLiveThroughBlock(unsigned MBBNum, void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI, unsigned IntvIn, SlotIndex LeaveBefore) { SlotIndex Start, Stop; - tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB); + std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB); DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " [" << Start << ';' << Stop << "), uses " << BI.FirstInstr << '-' << BI.LastInstr @@ -1378,7 +1376,7 @@ void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI, void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI, unsigned IntvOut, SlotIndex EnterAfter) { SlotIndex Start, Stop; - tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB); + std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB); DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " [" << Start << ';' << Stop << "), uses " << BI.FirstInstr << '-' << BI.LastInstr diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp index 3dbc050..7b1de85 100644 --- a/lib/CodeGen/StackColoring.cpp +++ b/lib/CodeGen/StackColoring.cpp @@ -30,7 +30,6 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SparseSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -44,7 +43,9 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SlotIndexes.h" -#include "llvm/DebugInfo.h" +#include "llvm/CodeGen/StackProtector.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" @@ -112,37 +113,25 @@ class StackColoring : public MachineFunctionPass { SmallVector<const MachineBasicBlock*, 8> BasicBlockNumbering; /// Maps liveness intervals for each slot. - SmallVector<LiveInterval*, 16> Intervals; + SmallVector<std::unique_ptr<LiveInterval>, 16> Intervals; /// VNInfo is used for the construction of LiveIntervals. VNInfo::Allocator VNInfoAllocator; /// SlotIndex analysis object. SlotIndexes *Indexes; + /// The stack protector object. + StackProtector *SP; /// The list of lifetime markers found. These markers are to be removed /// once the coloring is done. SmallVector<MachineInstr*, 8> Markers; - /// SlotSizeSorter - A Sort utility for arranging stack slots according - /// to their size. - struct SlotSizeSorter { - MachineFrameInfo *MFI; - SlotSizeSorter(MachineFrameInfo *mfi) : MFI(mfi) { } - bool operator()(int LHS, int RHS) { - // We use -1 to denote a uninteresting slot. Place these slots at the end. - if (LHS == -1) return false; - if (RHS == -1) return true; - // Sort according to size. - return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS); - } -}; - public: static char ID; StackColoring() : MachineFunctionPass(ID) { initializeStackColoringPass(*PassRegistry::getPassRegistry()); } - void getAnalysisUsage(AnalysisUsage &AU) const; - bool runOnMachineFunction(MachineFunction &MF); + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnMachineFunction(MachineFunction &MF) override; private: /// Debug. @@ -191,6 +180,7 @@ INITIALIZE_PASS_BEGIN(StackColoring, "stack-coloring", "Merge disjoint stack slots", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_DEPENDENCY(StackProtector) INITIALIZE_PASS_END(StackColoring, "stack-coloring", "Merge disjoint stack slots", false, false) @@ -198,6 +188,7 @@ void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<MachineDominatorTree>(); AU.addPreserved<MachineDominatorTree>(); AU.addRequired<SlotIndexes>(); + AU.addRequired<StackProtector>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -253,18 +244,16 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) { BlockInfo.Begin.resize(NumSlot); BlockInfo.End.resize(NumSlot); - for (MachineBasicBlock::iterator BI = (*FI)->begin(), BE = (*FI)->end(); - BI != BE; ++BI) { - - if (BI->getOpcode() != TargetOpcode::LIFETIME_START && - BI->getOpcode() != TargetOpcode::LIFETIME_END) + for (MachineInstr &MI : **FI) { + if (MI.getOpcode() != TargetOpcode::LIFETIME_START && + MI.getOpcode() != TargetOpcode::LIFETIME_END) continue; - Markers.push_back(BI); + Markers.push_back(&MI); - bool IsStart = BI->getOpcode() == TargetOpcode::LIFETIME_START; - const MachineOperand &MI = BI->getOperand(0); - unsigned Slot = MI.getIndex(); + bool IsStart = MI.getOpcode() == TargetOpcode::LIFETIME_START; + const MachineOperand &MO = MI.getOperand(0); + unsigned Slot = MO.getIndex(); MarkersFound++; @@ -310,11 +299,7 @@ void StackColoring::calculateLocalLiveness() { SmallPtrSet<const MachineBasicBlock*, 8> NextBBSet; - for (SmallVectorImpl<const MachineBasicBlock *>::iterator - PI = BasicBlockNumbering.begin(), PE = BasicBlockNumbering.end(); - PI != PE; ++PI) { - - const MachineBasicBlock *BB = *PI; + for (const MachineBasicBlock *BB : BasicBlockNumbering) { if (!BBSet.count(BB)) continue; // Use an iterator to avoid repeated lookups. @@ -369,18 +354,14 @@ void StackColoring::calculateLocalLiveness() { changed = true; BlockInfo.LiveIn |= LocalLiveIn; - for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(), - PE = BB->pred_end(); PI != PE; ++PI) - NextBBSet.insert(*PI); + NextBBSet.insert(BB->pred_begin(), BB->pred_end()); } if (LocalLiveOut.test(BlockInfo.LiveOut)) { changed = true; BlockInfo.LiveOut |= LocalLiveOut; - for (MachineBasicBlock::const_succ_iterator SI = BB->succ_begin(), - SE = BB->succ_end(); SI != SE; ++SI) - NextBBSet.insert(*SI); + NextBBSet.insert(BB->succ_begin(), BB->succ_end()); } } @@ -394,18 +375,15 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) { // For each block, find which slots are active within this block // and update the live intervals. - for (MachineFunction::iterator MBB = MF->begin(), MBBe = MF->end(); - MBB != MBBe; ++MBB) { + for (const MachineBasicBlock &MBB : *MF) { Starts.clear(); Starts.resize(NumSlots); Finishes.clear(); Finishes.resize(NumSlots); // Create the interval for the basic blocks with lifetime markers in them. - for (SmallVectorImpl<MachineInstr*>::const_iterator it = Markers.begin(), - e = Markers.end(); it != e; ++it) { - const MachineInstr *MI = *it; - if (MI->getParent() != MBB) + for (const MachineInstr *MI : Markers) { + if (MI->getParent() != &MBB) continue; assert((MI->getOpcode() == TargetOpcode::LIFETIME_START || @@ -429,14 +407,14 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) { } // Create the interval of the blocks that we previously found to be 'alive'. - BlockLifetimeInfo &MBBLiveness = BlockLiveness[MBB]; + BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB]; for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1; pos = MBBLiveness.LiveIn.find_next(pos)) { - Starts[pos] = Indexes->getMBBStartIdx(MBB); + Starts[pos] = Indexes->getMBBStartIdx(&MBB); } for (int pos = MBBLiveness.LiveOut.find_first(); pos != -1; pos = MBBLiveness.LiveOut.find_next(pos)) { - Finishes[pos] = Indexes->getMBBEndIdx(MBB); + Finishes[pos] = Indexes->getMBBEndIdx(&MBB); } for (unsigned i = 0; i < NumSlots; ++i) { @@ -452,10 +430,10 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) { // We have a single consecutive region. Intervals[i]->addSegment(LiveInterval::Segment(S, F, ValNum)); } else { - // We have two non consecutive regions. This happens when + // We have two non-consecutive regions. This happens when // LIFETIME_START appears after the LIFETIME_END marker. - SlotIndex NewStart = Indexes->getMBBStartIdx(MBB); - SlotIndex NewFin = Indexes->getMBBEndIdx(MBB); + SlotIndex NewStart = Indexes->getMBBStartIdx(&MBB); + SlotIndex NewFin = Indexes->getMBBEndIdx(&MBB); Intervals[i]->addSegment(LiveInterval::Segment(NewStart, F, ValNum)); Intervals[i]->addSegment(LiveInterval::Segment(S, NewFin, ValNum)); } @@ -465,8 +443,8 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) { bool StackColoring::removeAllMarkers() { unsigned Count = 0; - for (unsigned i = 0; i < Markers.size(); ++i) { - Markers[i]->eraseFromParent(); + for (MachineInstr *MI : Markers) { + MI->eraseFromParent(); Count++; } Markers.clear(); @@ -482,64 +460,74 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { MachineModuleInfo *MMI = &MF->getMMI(); // Remap debug information that refers to stack slots. - MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo(); - for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(), - VE = VMap.end(); VI != VE; ++VI) { - const MDNode *Var = VI->first; - if (!Var) continue; - std::pair<unsigned, DebugLoc> &VP = VI->second; - if (SlotRemap.count(VP.first)) { - DEBUG(dbgs()<<"Remapping debug info for ["<<Var->getName()<<"].\n"); - VP.first = SlotRemap[VP.first]; + for (auto &VI : MMI->getVariableDbgInfo()) { + if (!VI.Var) + continue; + if (SlotRemap.count(VI.Slot)) { + DEBUG(dbgs()<<"Remapping debug info for ["<<VI.Var->getName()<<"].\n"); + VI.Slot = SlotRemap[VI.Slot]; FixedDbg++; } } // Keep a list of *allocas* which need to be remapped. DenseMap<const AllocaInst*, const AllocaInst*> Allocas; - for (DenseMap<int, int>::const_iterator it = SlotRemap.begin(), - e = SlotRemap.end(); it != e; ++it) { - const AllocaInst *From = MFI->getObjectAllocation(it->first); - const AllocaInst *To = MFI->getObjectAllocation(it->second); + for (const std::pair<int, int> &SI : SlotRemap) { + const AllocaInst *From = MFI->getObjectAllocation(SI.first); + const AllocaInst *To = MFI->getObjectAllocation(SI.second); assert(To && From && "Invalid allocation object"); Allocas[From] = To; + + // AA might be used later for instruction scheduling, and we need it to be + // able to deduce the correct aliasing releationships between pointers + // derived from the alloca being remapped and the target of that remapping. + // The only safe way, without directly informing AA about the remapping + // somehow, is to directly update the IR to reflect the change being made + // here. + Instruction *Inst = const_cast<AllocaInst *>(To); + if (From->getType() != To->getType()) { + BitCastInst *Cast = new BitCastInst(Inst, From->getType()); + Cast->insertAfter(Inst); + Inst = Cast; + } + + // Allow the stack protector to adjust its value map to account for the + // upcoming replacement. + SP->adjustForColoring(From, To); + + // Note that this will not replace uses in MMOs (which we'll update below), + // or anywhere else (which is why we won't delete the original + // instruction). + const_cast<AllocaInst *>(From)->replaceAllUsesWith(Inst); } // Remap all instructions to the new stack slots. - MachineFunction::iterator BB, BBE; - MachineBasicBlock::iterator I, IE; - for (BB = MF->begin(), BBE = MF->end(); BB != BBE; ++BB) - for (I = BB->begin(), IE = BB->end(); I != IE; ++I) { - + for (MachineBasicBlock &BB : *MF) + for (MachineInstr &I : BB) { // Skip lifetime markers. We'll remove them soon. - if (I->getOpcode() == TargetOpcode::LIFETIME_START || - I->getOpcode() == TargetOpcode::LIFETIME_END) + if (I.getOpcode() == TargetOpcode::LIFETIME_START || + I.getOpcode() == TargetOpcode::LIFETIME_END) continue; // Update the MachineMemOperand to use the new alloca. - for (MachineInstr::mmo_iterator MM = I->memoperands_begin(), - E = I->memoperands_end(); MM != E; ++MM) { - MachineMemOperand *MMO = *MM; - + for (MachineMemOperand *MMO : I.memoperands()) { const Value *V = MMO->getValue(); if (!V) continue; - const PseudoSourceValue *PSV = dyn_cast<const PseudoSourceValue>(V); - if (PSV && PSV->isConstant(MFI)) - continue; + // FIXME: In order to enable the use of TBAA when using AA in CodeGen, + // we'll also need to update the TBAA nodes in MMOs with values + // derived from the merged allocas. When doing this, we'll need to use + // the same variant of GetUnderlyingObjects that is used by the + // instruction scheduler (that can look through ptrtoint/inttoptr + // pairs). - // Climb up and find the original alloca. - V = GetUnderlyingObject(V); - // If we did not find one, or if the one that we found is not in our - // map, then move on. - if (!V || !isa<AllocaInst>(V)) { - // Clear mem operand since we don't know for sure that it doesn't - // alias a merged alloca. - MMO->setValue(0); + // We've replaced IR-level uses of the remapped allocas, so we only + // need to replace direct uses here. + if (!isa<AllocaInst>(V)) continue; - } + const AllocaInst *AI= cast<AllocaInst>(V); if (!Allocas.count(AI)) continue; @@ -549,9 +537,7 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { } // Update all of the machine instruction operands. - for (unsigned i = 0 ; i < I->getNumOperands(); ++i) { - MachineOperand &MO = I->getOperand(i); - + for (MachineOperand &MO : I.operands()) { if (!MO.isFI()) continue; int FromSlot = MO.getIndex(); @@ -572,12 +558,12 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { // zone are are okay, despite the fact that we don't have a good way // for validating all of the usages of the calculation. #ifndef NDEBUG - bool TouchesMemory = I->mayLoad() || I->mayStore(); + bool TouchesMemory = I.mayLoad() || I.mayStore(); // If we *don't* protect the user from escaped allocas, don't bother // validating the instructions. - if (!I->isDebugValue() && TouchesMemory && ProtectFromEscapedAllocas) { - SlotIndex Index = Indexes->getInstructionIndex(I); - LiveInterval *Interval = Intervals[FromSlot]; + if (!I.isDebugValue() && TouchesMemory && ProtectFromEscapedAllocas) { + SlotIndex Index = Indexes->getInstructionIndex(&I); + const LiveInterval *Interval = &*Intervals[FromSlot]; assert(Interval->find(Index) != Interval->end() && "Found instruction usage outside of live range."); } @@ -596,13 +582,10 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { } void StackColoring::removeInvalidSlotRanges() { - MachineFunction::const_iterator BB, BBE; - MachineBasicBlock::const_iterator I, IE; - for (BB = MF->begin(), BBE = MF->end(); BB != BBE; ++BB) - for (I = BB->begin(), IE = BB->end(); I != IE; ++I) { - - if (I->getOpcode() == TargetOpcode::LIFETIME_START || - I->getOpcode() == TargetOpcode::LIFETIME_END || I->isDebugValue()) + for (MachineBasicBlock &BB : *MF) + for (MachineInstr &I : BB) { + if (I.getOpcode() == TargetOpcode::LIFETIME_START || + I.getOpcode() == TargetOpcode::LIFETIME_END || I.isDebugValue()) continue; // Some intervals are suspicious! In some cases we find address @@ -611,13 +594,11 @@ void StackColoring::removeInvalidSlotRanges() { // violation, but address calculations are okay. This can happen when // GEPs are hoisted outside of the lifetime zone. // So, in here we only check instructions which can read or write memory. - if (!I->mayLoad() && !I->mayStore()) + if (!I.mayLoad() && !I.mayStore()) continue; // Check all of the machine operands. - for (unsigned i = 0 ; i < I->getNumOperands(); ++i) { - const MachineOperand &MO = I->getOperand(i); - + for (const MachineOperand &MO : I.operands()) { if (!MO.isFI()) continue; @@ -631,10 +612,10 @@ void StackColoring::removeInvalidSlotRanges() { // Check that the used slot is inside the calculated lifetime range. // If it is not, warn about it and invalidate the range. - LiveInterval *Interval = Intervals[Slot]; - SlotIndex Index = Indexes->getInstructionIndex(I); + LiveInterval *Interval = &*Intervals[Slot]; + SlotIndex Index = Indexes->getInstructionIndex(&I); if (Interval->find(Index) == Interval->end()) { - Intervals[Slot]->clear(); + Interval->clear(); DEBUG(dbgs()<<"Invalidating range #"<<Slot<<"\n"); EscapedAllocas++; } @@ -659,12 +640,16 @@ void StackColoring::expungeSlotMap(DenseMap<int, int> &SlotRemap, } bool StackColoring::runOnMachineFunction(MachineFunction &Func) { + if (skipOptnoneFunction(*Func.getFunction())) + return false; + DEBUG(dbgs() << "********** Stack Coloring **********\n" << "********** Function: " << ((const Value*)Func.getFunction())->getName() << '\n'); MF = &Func; MFI = MF->getFrameInfo(); Indexes = &getAnalysis<SlotIndexes>(); + SP = &getAnalysis<StackProtector>(); BlockLiveness.clear(); BasicBlocks.clear(); BasicBlockNumbering.clear(); @@ -704,9 +689,9 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { } for (unsigned i=0; i < NumSlots; ++i) { - LiveInterval *LI = new LiveInterval(i, 0); - Intervals.push_back(LI); + std::unique_ptr<LiveInterval> LI(new LiveInterval(i, 0)); LI->getNextValue(Indexes->getZeroIndex(), VNInfoAllocator); + Intervals.push_back(std::move(LI)); SortedSlots.push_back(i); } @@ -741,7 +726,13 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { // Sort the slots according to their size. Place unused slots at the end. // Use stable sort to guarantee deterministic code generation. std::stable_sort(SortedSlots.begin(), SortedSlots.end(), - SlotSizeSorter(MFI)); + [this](int LHS, int RHS) { + // We use -1 to denote a uninteresting slot. Place these slots at the end. + if (LHS == -1) return false; + if (RHS == -1) return true; + // Sort according to size. + return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS); + }); bool Changed = true; while (Changed) { @@ -756,8 +747,8 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { int FirstSlot = SortedSlots[I]; int SecondSlot = SortedSlots[J]; - LiveInterval *First = Intervals[FirstSlot]; - LiveInterval *Second = Intervals[SecondSlot]; + LiveInterval *First = &*Intervals[FirstSlot]; + LiveInterval *Second = &*Intervals[SecondSlot]; assert (!First->empty() && !Second->empty() && "Found an empty range"); // Merge disjoint slots. @@ -795,10 +786,5 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { expungeSlotMap(SlotRemap, NumSlots); remapInstructions(SlotRemap); - // Release the intervals. - for (unsigned I = 0; I < NumSlots; ++I) { - delete Intervals[I]; - } - return removeAllMarkers(); } diff --git a/lib/CodeGen/StackMapLivenessAnalysis.cpp b/lib/CodeGen/StackMapLivenessAnalysis.cpp new file mode 100644 index 0000000..a374417 --- /dev/null +++ b/lib/CodeGen/StackMapLivenessAnalysis.cpp @@ -0,0 +1,128 @@ +//===-- StackMapLivenessAnalysis.cpp - StackMap live Out Analysis ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the StackMap Liveness analysis pass. The pass calculates +// the liveness for each basic block in a function and attaches the register +// live-out information to a stackmap or patchpoint intrinsic if present. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "stackmaps" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/StackMapLivenessAnalysis.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" + + +using namespace llvm; + +namespace llvm { +cl::opt<bool> EnableStackMapLiveness("enable-stackmap-liveness", + cl::Hidden, cl::desc("Enable StackMap Liveness Analysis Pass")); +cl::opt<bool> EnablePatchPointLiveness("enable-patchpoint-liveness", + cl::Hidden, cl::desc("Enable PatchPoint Liveness Analysis Pass")); +} + +STATISTIC(NumStackMapFuncVisited, "Number of functions visited"); +STATISTIC(NumStackMapFuncSkipped, "Number of functions skipped"); +STATISTIC(NumBBsVisited, "Number of basic blocks visited"); +STATISTIC(NumBBsHaveNoStackmap, "Number of basic blocks with no stackmap"); +STATISTIC(NumStackMaps, "Number of StackMaps visited"); + +char StackMapLiveness::ID = 0; +char &llvm::StackMapLivenessID = StackMapLiveness::ID; +INITIALIZE_PASS(StackMapLiveness, "stackmap-liveness", + "StackMap Liveness Analysis", false, false) + +/// Default construct and initialize the pass. +StackMapLiveness::StackMapLiveness() : MachineFunctionPass(ID) { + initializeStackMapLivenessPass(*PassRegistry::getPassRegistry()); +} + +/// Tell the pass manager which passes we depend on and what information we +/// preserve. +void StackMapLiveness::getAnalysisUsage(AnalysisUsage &AU) const { + // We preserve all information. + AU.setPreservesAll(); + AU.setPreservesCFG(); + // Default dependencie for all MachineFunction passes. + AU.addRequired<MachineFunctionAnalysis>(); +} + +/// Calculate the liveness information for the given machine function. +bool StackMapLiveness::runOnMachineFunction(MachineFunction &_MF) { + DEBUG(dbgs() << "********** COMPUTING STACKMAP LIVENESS: " + << _MF.getName() << " **********\n"); + MF = &_MF; + TRI = MF->getTarget().getRegisterInfo(); + ++NumStackMapFuncVisited; + + // Skip this function if there are no stackmaps or patchpoints to process. + if (!((MF->getFrameInfo()->hasStackMap() && EnableStackMapLiveness) || + (MF->getFrameInfo()->hasPatchPoint() && EnablePatchPointLiveness))) { + ++NumStackMapFuncSkipped; + return false; + } + return calculateLiveness(); +} + +/// Performs the actual liveness calculation for the function. +bool StackMapLiveness::calculateLiveness() { + bool HasChanged = false; + // For all basic blocks in the function. + for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); + MBBI != MBBE; ++MBBI) { + DEBUG(dbgs() << "****** BB " << MBBI->getName() << " ******\n"); + LiveRegs.init(TRI); + LiveRegs.addLiveOuts(MBBI); + bool HasStackMap = false; + // Reverse iterate over all instructions and add the current live register + // set to an instruction if we encounter a stackmap or patchpoint + // instruction. + for (MachineBasicBlock::reverse_iterator I = MBBI->rbegin(), + E = MBBI->rend(); I != E; ++I) { + int Opc = I->getOpcode(); + if ((EnableStackMapLiveness && (Opc == TargetOpcode::STACKMAP)) || + (EnablePatchPointLiveness && (Opc == TargetOpcode::PATCHPOINT))) { + addLiveOutSetToMI(*I); + HasChanged = true; + HasStackMap = true; + ++NumStackMaps; + } + DEBUG(dbgs() << " " << *I << " " << LiveRegs); + LiveRegs.stepBackward(*I); + } + ++NumBBsVisited; + if (!HasStackMap) + ++NumBBsHaveNoStackmap; + } + return HasChanged; +} + +/// Add the current register live set to the instruction. +void StackMapLiveness::addLiveOutSetToMI(MachineInstr &MI) { + uint32_t *Mask = createRegisterMask(); + MachineOperand MO = MachineOperand::CreateRegLiveOut(Mask); + MI.addOperand(*MF, MO); +} + +/// Create a register mask and initialize it with the registers from the +/// register live set. +uint32_t *StackMapLiveness::createRegisterMask() const { + // The mask is owned and cleaned up by the Machine Function. + uint32_t *Mask = MF->allocateRegisterMask(TRI->getNumRegs()); + for (LivePhysRegs::const_iterator RI = LiveRegs.begin(), RE = LiveRegs.end(); + RI != RE; ++RI) + Mask[*RI / 32] |= 1U << (*RI % 32); + return Mask; +} diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp index 40893ea..a6522dc 100644 --- a/lib/CodeGen/StackMaps.cpp +++ b/lib/CodeGen/StackMaps.cpp @@ -10,9 +10,11 @@ #define DEBUG_TYPE "stackmaps" #include "llvm/CodeGen/StackMaps.h" - #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/IR/DataLayout.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCObjectFileInfo.h" @@ -20,22 +22,20 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" - #include <iterator> using namespace llvm; -PatchPointOpers::PatchPointOpers(const MachineInstr *MI): - MI(MI), - HasDef(MI->getOperand(0).isReg() && MI->getOperand(0).isDef() && - !MI->getOperand(0).isImplicit()), - IsAnyReg(MI->getOperand(getMetaIdx(CCPos)).getImm() == CallingConv::AnyReg) { - +PatchPointOpers::PatchPointOpers(const MachineInstr *MI) + : MI(MI), + HasDef(MI->getOperand(0).isReg() && MI->getOperand(0).isDef() && + !MI->getOperand(0).isImplicit()), + IsAnyReg(MI->getOperand(getMetaIdx(CCPos)).getImm() == CallingConv::AnyReg) +{ #ifndef NDEBUG - { unsigned CheckStartIdx = 0, e = MI->getNumOperands(); while (CheckStartIdx < e && MI->getOperand(CheckStartIdx).isReg() && MI->getOperand(CheckStartIdx).isDef() && @@ -43,8 +43,7 @@ PatchPointOpers::PatchPointOpers(const MachineInstr *MI): ++CheckStartIdx; assert(getMetaIdx() == CheckStartIdx && - "Unexpected additonal definition in Patchpoint intrinsic."); - } + "Unexpected additional definition in Patchpoint intrinsic."); #endif } @@ -65,7 +64,121 @@ unsigned PatchPointOpers::getNextScratchIdx(unsigned StartIdx) const { return ScratchIdx; } -void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint32_t ID, +MachineInstr::const_mop_iterator +StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI, + MachineInstr::const_mop_iterator MOE, + LocationVec &Locs, LiveOutVec &LiveOuts) const { + if (MOI->isImm()) { + switch (MOI->getImm()) { + default: llvm_unreachable("Unrecognized operand type."); + case StackMaps::DirectMemRefOp: { + unsigned Size = AP.TM.getDataLayout()->getPointerSizeInBits(); + assert((Size % 8) == 0 && "Need pointer size in bytes."); + Size /= 8; + unsigned Reg = (++MOI)->getReg(); + int64_t Imm = (++MOI)->getImm(); + Locs.push_back(Location(StackMaps::Location::Direct, Size, Reg, Imm)); + break; + } + case StackMaps::IndirectMemRefOp: { + int64_t Size = (++MOI)->getImm(); + assert(Size > 0 && "Need a valid size for indirect memory locations."); + unsigned Reg = (++MOI)->getReg(); + int64_t Imm = (++MOI)->getImm(); + Locs.push_back(Location(StackMaps::Location::Indirect, Size, Reg, Imm)); + break; + } + case StackMaps::ConstantOp: { + ++MOI; + assert(MOI->isImm() && "Expected constant operand."); + int64_t Imm = MOI->getImm(); + Locs.push_back(Location(Location::Constant, sizeof(int64_t), 0, Imm)); + break; + } + } + return ++MOI; + } + + // The physical register number will ultimately be encoded as a DWARF regno. + // The stack map also records the size of a spill slot that can hold the + // register content. (The runtime can track the actual size of the data type + // if it needs to.) + if (MOI->isReg()) { + // Skip implicit registers (this includes our scratch registers) + if (MOI->isImplicit()) + return ++MOI; + + assert(TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) && + "Virtreg operands should have been rewritten before now."); + const TargetRegisterClass *RC = + AP.TM.getRegisterInfo()->getMinimalPhysRegClass(MOI->getReg()); + assert(!MOI->getSubReg() && "Physical subreg still around."); + Locs.push_back( + Location(Location::Register, RC->getSize(), MOI->getReg(), 0)); + return ++MOI; + } + + if (MOI->isRegLiveOut()) + LiveOuts = parseRegisterLiveOutMask(MOI->getRegLiveOut()); + + return ++MOI; +} + +/// Go up the super-register chain until we hit a valid dwarf register number. +static unsigned getDwarfRegNum(unsigned Reg, const TargetRegisterInfo *TRI) { + int RegNo = TRI->getDwarfRegNum(Reg, false); + for (MCSuperRegIterator SR(Reg, TRI); SR.isValid() && RegNo < 0; ++SR) + RegNo = TRI->getDwarfRegNum(*SR, false); + + assert(RegNo >= 0 && "Invalid Dwarf register number."); + return (unsigned) RegNo; +} + +/// Create a live-out register record for the given register Reg. +StackMaps::LiveOutReg +StackMaps::createLiveOutReg(unsigned Reg, const TargetRegisterInfo *TRI) const { + unsigned RegNo = getDwarfRegNum(Reg, TRI); + unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize(); + return LiveOutReg(Reg, RegNo, Size); +} + +/// Parse the register live-out mask and return a vector of live-out registers +/// that need to be recorded in the stackmap. +StackMaps::LiveOutVec +StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const { + assert(Mask && "No register mask specified"); + const TargetRegisterInfo *TRI = AP.TM.getRegisterInfo(); + LiveOutVec LiveOuts; + + // Create a LiveOutReg for each bit that is set in the register mask. + for (unsigned Reg = 0, NumRegs = TRI->getNumRegs(); Reg != NumRegs; ++Reg) + if ((Mask[Reg / 32] >> Reg % 32) & 1) + LiveOuts.push_back(createLiveOutReg(Reg, TRI)); + + // We don't need to keep track of a register if its super-register is already + // in the list. Merge entries that refer to the same dwarf register and use + // the maximum size that needs to be spilled. + std::sort(LiveOuts.begin(), LiveOuts.end()); + for (LiveOutVec::iterator I = LiveOuts.begin(), E = LiveOuts.end(); + I != E; ++I) { + for (LiveOutVec::iterator II = std::next(I); II != E; ++II) { + if (I->RegNo != II->RegNo) { + // Skip all the now invalid entries. + I = --II; + break; + } + I->Size = std::max(I->Size, II->Size); + if (TRI->isSuperRegister(I->Reg, II->Reg)) + I->Reg = II->Reg; + II->MarkInvalid(); + } + } + LiveOuts.erase(std::remove_if(LiveOuts.begin(), LiveOuts.end(), + LiveOutReg::IsInvalid), LiveOuts.end()); + return LiveOuts; +} + +void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID, MachineInstr::const_mop_iterator MOI, MachineInstr::const_mop_iterator MOE, bool recordResult) { @@ -74,71 +187,64 @@ void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint32_t ID, MCSymbol *MILabel = OutContext.CreateTempSymbol(); AP.OutStreamer.EmitLabel(MILabel); - LocationVec CallsiteLocs; + LocationVec Locations; + LiveOutVec LiveOuts; if (recordResult) { - std::pair<Location, MachineInstr::const_mop_iterator> ParseResult = - OpParser(MI.operands_begin(), llvm::next(MI.operands_begin()), AP.TM); - - Location &Loc = ParseResult.first; - assert(Loc.LocType == Location::Register && - "Stackmap return location must be a register."); - CallsiteLocs.push_back(Loc); + assert(PatchPointOpers(&MI).hasDef() && "Stackmap has no return value."); + parseOperand(MI.operands_begin(), std::next(MI.operands_begin()), + Locations, LiveOuts); } + // Parse operands. while (MOI != MOE) { - std::pair<Location, MachineInstr::const_mop_iterator> ParseResult = - OpParser(MOI, MOE, AP.TM); - - Location &Loc = ParseResult.first; + MOI = parseOperand(MOI, MOE, Locations, LiveOuts); + } - // Move large constants into the constant pool. - if (Loc.LocType == Location::Constant && (Loc.Offset & ~0xFFFFFFFFULL)) { - Loc.LocType = Location::ConstantIndex; - Loc.Offset = ConstPool.getConstantIndex(Loc.Offset); + // Move large constants into the constant pool. + for (LocationVec::iterator I = Locations.begin(), E = Locations.end(); + I != E; ++I) { + // Constants are encoded as sign-extended integers. + // -1 is directly encoded as .long 0xFFFFFFFF with no constant pool. + if (I->LocType == Location::Constant && + ((I->Offset + (int64_t(1)<<31)) >> 32) != 0) { + I->LocType = Location::ConstantIndex; + I->Offset = ConstPool.getConstantIndex(I->Offset); } - - CallsiteLocs.push_back(Loc); - MOI = ParseResult.second; } + // Create an expression to calculate the offset of the callsite from function + // entry. const MCExpr *CSOffsetExpr = MCBinaryExpr::CreateSub( MCSymbolRefExpr::Create(MILabel, OutContext), MCSymbolRefExpr::Create(AP.CurrentFnSym, OutContext), OutContext); - CSInfos.push_back(CallsiteInfo(CSOffsetExpr, ID, CallsiteLocs)); -} - -static MachineInstr::const_mop_iterator -getStackMapEndMOP(MachineInstr::const_mop_iterator MOI, - MachineInstr::const_mop_iterator MOE) { - for (; MOI != MOE; ++MOI) - if (MOI->isRegMask() || (MOI->isReg() && MOI->isImplicit())) - break; + CSInfos.push_back(CallsiteInfo(CSOffsetExpr, ID, Locations, LiveOuts)); - return MOI; + // Record the stack size of the current function. + const MachineFrameInfo *MFI = AP.MF->getFrameInfo(); + FnStackSize[AP.CurrentFnSym] = + MFI->hasVarSizedObjects() ? UINT64_MAX : MFI->getStackSize(); } void StackMaps::recordStackMap(const MachineInstr &MI) { - assert(MI.getOpcode() == TargetOpcode::STACKMAP && "exected stackmap"); + assert(MI.getOpcode() == TargetOpcode::STACKMAP && "expected stackmap"); int64_t ID = MI.getOperand(0).getImm(); - assert((int32_t)ID == ID && "Stack maps hold 32-bit IDs"); - recordStackMapOpers(MI, ID, llvm::next(MI.operands_begin(), 2), - getStackMapEndMOP(MI.operands_begin(), - MI.operands_end())); + recordStackMapOpers(MI, ID, std::next(MI.operands_begin(), 2), + MI.operands_end()); } void StackMaps::recordPatchPoint(const MachineInstr &MI) { - assert(MI.getOpcode() == TargetOpcode::PATCHPOINT && "exected stackmap"); + assert(MI.getOpcode() == TargetOpcode::PATCHPOINT && "expected patchpoint"); PatchPointOpers opers(&MI); int64_t ID = opers.getMetaOper(PatchPointOpers::IDPos).getImm(); - assert((int32_t)ID == ID && "Stack maps hold 32-bit IDs"); + MachineInstr::const_mop_iterator MOI = - llvm::next(MI.operands_begin(), opers.getStackMapStartIdx()); - recordStackMapOpers(MI, ID, MOI, getStackMapEndMOP(MOI, MI.operands_end()), + std::next(MI.operands_begin(), opers.getStackMapStartIdx()); + recordStackMapOpers(MI, ID, MOI, MI.operands_end(), opers.isAnyReg() && opers.hasDef()); #ifndef NDEBUG @@ -155,12 +261,21 @@ void StackMaps::recordPatchPoint(const MachineInstr &MI) { /// serializeToStackMapSection conceptually populates the following fields: /// -/// uint32 : Reserved (header) +/// Header { +/// uint8 : Stack Map Version (currently 1) +/// uint8 : Reserved (expected to be 0) +/// uint16 : Reserved (expected to be 0) +/// } +/// uint32 : NumFunctions /// uint32 : NumConstants -/// int64 : Constants[NumConstants] /// uint32 : NumRecords +/// StkSizeRecord[NumFunctions] { +/// uint64 : Function Address +/// uint64 : Stack Size +/// } +/// int64 : Constants[NumConstants] /// StkMapRecord[NumRecords] { -/// uint32 : PatchPoint ID +/// uint64 : PatchPoint ID /// uint32 : Instruction Offset /// uint16 : Reserved (record flags) /// uint16 : NumLocations @@ -170,6 +285,14 @@ void StackMaps::recordPatchPoint(const MachineInstr &MI) { /// uint16 : Dwarf RegNum /// int32 : Offset /// } +/// uint16 : Padding +/// uint16 : NumLiveOuts +/// LiveOuts[NumLiveOuts] { +/// uint16 : Dwarf RegNum +/// uint8 : Reserved +/// uint8 : Size in Bytes +/// } +/// uint32 : Padding (only if required to align to 8 byte) /// } /// /// Location Encoding, Type, Value: @@ -199,29 +322,42 @@ void StackMaps::serializeToStackMapSection() { // Serialize data. const char *WSMP = "Stack Maps: "; (void)WSMP; - const MCRegisterInfo &MCRI = *OutContext.getRegisterInfo(); DEBUG(dbgs() << "********** Stack Map Output **********\n"); // Header. - AP.OutStreamer.EmitIntValue(0, 4); + AP.OutStreamer.EmitIntValue(1, 1); // Version. + AP.OutStreamer.EmitIntValue(0, 1); // Reserved. + AP.OutStreamer.EmitIntValue(0, 2); // Reserved. + // Num functions. + DEBUG(dbgs() << WSMP << "#functions = " << FnStackSize.size() << '\n'); + AP.OutStreamer.EmitIntValue(FnStackSize.size(), 4); // Num constants. + DEBUG(dbgs() << WSMP << "#constants = " << ConstPool.getNumConstants() + << '\n'); AP.OutStreamer.EmitIntValue(ConstPool.getNumConstants(), 4); + // Num callsites. + DEBUG(dbgs() << WSMP << "#callsites = " << CSInfos.size() << '\n'); + AP.OutStreamer.EmitIntValue(CSInfos.size(), 4); + + // Function stack size entries. + for (FnStackSizeMap::iterator I = FnStackSize.begin(), E = FnStackSize.end(); + I != E; ++I) { + AP.OutStreamer.EmitSymbolValue(I->first, 8); + AP.OutStreamer.EmitIntValue(I->second, 8); + } // Constant pool entries. for (unsigned i = 0; i < ConstPool.getNumConstants(); ++i) AP.OutStreamer.EmitIntValue(ConstPool.getConstant(i), 8); - DEBUG(dbgs() << WSMP << "#callsites = " << CSInfos.size() << "\n"); - AP.OutStreamer.EmitIntValue(CSInfos.size(), 4); - + // Callsite entries. for (CallsiteInfoList::const_iterator CSII = CSInfos.begin(), - CSIE = CSInfos.end(); - CSII != CSIE; ++CSII) { - - unsigned CallsiteID = CSII->ID; + CSIE = CSInfos.end(); CSII != CSIE; ++CSII) { + uint64_t CallsiteID = CSII->ID; const LocationVec &CSLocs = CSII->Locations; + const LiveOutVec &LiveOuts = CSII->LiveOuts; DEBUG(dbgs() << WSMP << "callsite " << CallsiteID << "\n"); @@ -229,15 +365,18 @@ void StackMaps::serializeToStackMapSection() { // runtime than crash in case of in-process compilation. Currently, we do // simple overflow checks, but we may eventually communicate other // compilation errors this way. - if (CSLocs.size() > UINT16_MAX) { - AP.OutStreamer.EmitIntValue(UINT32_MAX, 4); // Invalid ID. + if (CSLocs.size() > UINT16_MAX || LiveOuts.size() > UINT16_MAX) { + AP.OutStreamer.EmitIntValue(UINT64_MAX, 8); // Invalid ID. AP.OutStreamer.EmitValue(CSII->CSOffsetExpr, 4); AP.OutStreamer.EmitIntValue(0, 2); // Reserved. AP.OutStreamer.EmitIntValue(0, 2); // 0 locations. + AP.OutStreamer.EmitIntValue(0, 2); // padding. + AP.OutStreamer.EmitIntValue(0, 2); // 0 live-out registers. + AP.OutStreamer.EmitIntValue(0, 4); // padding. continue; } - AP.OutStreamer.EmitIntValue(CallsiteID, 4); + AP.OutStreamer.EmitIntValue(CallsiteID, 8); AP.OutStreamer.EmitValue(CSII->CSOffsetExpr, 4); // Reserved for flags. @@ -251,6 +390,26 @@ void StackMaps::serializeToStackMapSection() { for (LocationVec::const_iterator LocI = CSLocs.begin(), LocE = CSLocs.end(); LocI != LocE; ++LocI, ++operIdx) { const Location &Loc = *LocI; + unsigned RegNo = 0; + int Offset = Loc.Offset; + if(Loc.Reg) { + RegNo = getDwarfRegNum(Loc.Reg, TRI); + + // If this is a register location, put the subregister byte offset in + // the location offset. + if (Loc.LocType == Location::Register) { + assert(!Loc.Offset && "Register location should have zero offset"); + unsigned LLVMRegNo = TRI->getLLVMRegNum(RegNo, false); + unsigned SubRegIdx = TRI->getSubRegIndex(LLVMRegNo, Loc.Reg); + if (SubRegIdx) + Offset = TRI->getSubRegIdxOffset(SubRegIdx); + } + } + else { + assert(Loc.LocType != Location::Register && + "Missing location register"); + } + DEBUG( dbgs() << WSMP << " Loc " << operIdx << ": "; switch (Loc.LocType) { @@ -258,15 +417,15 @@ void StackMaps::serializeToStackMapSection() { dbgs() << "<Unprocessed operand>"; break; case Location::Register: - dbgs() << "Register " << MCRI.getName(Loc.Reg); + dbgs() << "Register " << TRI->getName(Loc.Reg); break; case Location::Direct: - dbgs() << "Direct " << MCRI.getName(Loc.Reg); + dbgs() << "Direct " << TRI->getName(Loc.Reg); if (Loc.Offset) dbgs() << " + " << Loc.Offset; break; case Location::Indirect: - dbgs() << "Indirect " << MCRI.getName(Loc.Reg) + dbgs() << "Indirect " << TRI->getName(Loc.Reg) << " + " << Loc.Offset; break; case Location::Constant: @@ -276,36 +435,39 @@ void StackMaps::serializeToStackMapSection() { dbgs() << "Constant Index " << Loc.Offset; break; } - dbgs() << "\n"; + dbgs() << " [encoding: .byte " << Loc.LocType + << ", .byte " << Loc.Size + << ", .short " << RegNo + << ", .int " << Offset << "]\n"; ); - unsigned RegNo = 0; - int Offset = Loc.Offset; - if(Loc.Reg) { - RegNo = MCRI.getDwarfRegNum(Loc.Reg, false); - for (MCSuperRegIterator SR(Loc.Reg, TRI); - SR.isValid() && (int)RegNo < 0; ++SR) { - RegNo = TRI->getDwarfRegNum(*SR, false); - } - // If this is a register location, put the subregister byte offset in - // the location offset. - if (Loc.LocType == Location::Register) { - assert(!Loc.Offset && "Register location should have zero offset"); - unsigned LLVMRegNo = MCRI.getLLVMRegNum(RegNo, false); - unsigned SubRegIdx = MCRI.getSubRegIndex(LLVMRegNo, Loc.Reg); - if (SubRegIdx) - Offset = MCRI.getSubRegIdxOffset(SubRegIdx); - } - } - else { - assert(Loc.LocType != Location::Register && - "Missing location register"); - } AP.OutStreamer.EmitIntValue(Loc.LocType, 1); AP.OutStreamer.EmitIntValue(Loc.Size, 1); AP.OutStreamer.EmitIntValue(RegNo, 2); AP.OutStreamer.EmitIntValue(Offset, 4); } + + DEBUG(dbgs() << WSMP << " has " << LiveOuts.size() + << " live-out registers\n"); + + // Num live-out registers and padding to align to 4 byte. + AP.OutStreamer.EmitIntValue(0, 2); + AP.OutStreamer.EmitIntValue(LiveOuts.size(), 2); + + operIdx = 0; + for (LiveOutVec::const_iterator LI = LiveOuts.begin(), LE = LiveOuts.end(); + LI != LE; ++LI, ++operIdx) { + DEBUG(dbgs() << WSMP << " LO " << operIdx << ": " + << TRI->getName(LI->Reg) + << " [encoding: .short " << LI->RegNo + << ", .byte 0, .byte " << LI->Size << "]\n"); + + AP.OutStreamer.EmitIntValue(LI->RegNo, 2); + AP.OutStreamer.EmitIntValue(0, 1); + AP.OutStreamer.EmitIntValue(LI->Size, 1); + } + // Emit alignment to 8 byte. + AP.OutStreamer.EmitValueToAlignment(8); } AP.OutStreamer.AddBlankLine(); diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index 9020449..f3749e5 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -16,12 +16,11 @@ #define DEBUG_TYPE "stack-protector" #include "llvm/CodeGen/StackProtector.h" -#include "llvm/CodeGen/Analysis.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -58,10 +57,33 @@ StackProtector::getSSPLayout(const AllocaInst *AI) const { return AI ? Layout.lookup(AI) : SSPLK_None; } +void StackProtector::adjustForColoring(const AllocaInst *From, + const AllocaInst *To) { + // When coloring replaces one alloca with another, transfer the SSPLayoutKind + // tag from the remapped to the target alloca. The remapped alloca should + // have a size smaller than or equal to the replacement alloca. + SSPLayoutMap::iterator I = Layout.find(From); + if (I != Layout.end()) { + SSPLayoutKind Kind = I->second; + Layout.erase(I); + + // Transfer the tag, but make sure that SSPLK_AddrOf does not overwrite + // SSPLK_SmallArray or SSPLK_LargeArray, and make sure that + // SSPLK_SmallArray does not overwrite SSPLK_LargeArray. + I = Layout.find(To); + if (I == Layout.end()) + Layout.insert(std::make_pair(To, Kind)); + else if (I->second != SSPLK_LargeArray && Kind != SSPLK_AddrOf) + I->second = Kind; + } +} + bool StackProtector::runOnFunction(Function &Fn) { F = &Fn; M = F->getParent(); - DT = getAnalysisIfAvailable<DominatorTree>(); + DominatorTreeWrapperPass *DTWP = + getAnalysisIfAvailable<DominatorTreeWrapperPass>(); + DT = DTWP ? &DTWP->getDomTree() : 0; TLI = TM->getTargetLowering(); if (!RequiresStackProtector()) @@ -69,8 +91,9 @@ bool StackProtector::runOnFunction(Function &Fn) { Attribute Attr = Fn.getAttributes().getAttribute( AttributeSet::FunctionIndex, "stack-protector-buffer-size"); - if (Attr.isStringAttribute()) - Attr.getValueAsString().getAsInteger(10, SSPBufferSize); + if (Attr.isStringAttribute() && + Attr.getValueAsString().getAsInteger(10, SSPBufferSize)) + return false; // Invalid integer string ++NumFunProtected; return InsertStackProtectors(); @@ -127,9 +150,7 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge, } bool StackProtector::HasAddressTaken(const Instruction *AI) { - for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end(); - UI != UE; ++UI) { - const User *U = *UI; + for (const User *U : AI->users()) { if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { if (AI == SI->getValueOperand()) return true; @@ -261,8 +282,7 @@ static CallInst *FindPotentialTailCall(BasicBlock *BB, ReturnInst *RI, const unsigned MaxSearch = 4; bool NoInterposingChain = true; - for (BasicBlock::reverse_iterator I = llvm::next(BB->rbegin()), - E = BB->rend(); + for (BasicBlock::reverse_iterator I = std::next(BB->rbegin()), E = BB->rend(); I != E && SearchCounter < MaxSearch; ++I) { Instruction *Inst = &*I; diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index 9f44df8..2717f4c 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -87,7 +87,7 @@ namespace { initializeStackSlotColoringPass(*PassRegistry::getPassRegistry()); } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired<SlotIndexes>(); AU.addPreserved<SlotIndexes>(); @@ -98,7 +98,7 @@ namespace { MachineFunctionPass::getAnalysisUsage(AU); } - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; private: void InitializeSlots(); @@ -142,7 +142,6 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) { for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); MBBI != E; ++MBBI) { MachineBasicBlock *MBB = &*MBBI; - BlockFrequency Freq = MBFI->getBlockFreq(MBB); for (MachineBasicBlock::iterator MII = MBB->begin(), EE = MBB->end(); MII != EE; ++MII) { MachineInstr *MI = &*MII; @@ -157,7 +156,7 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) { continue; LiveInterval &li = LS->getInterval(FI); if (!MI->isDebugValue()) - li.weight += LiveIntervals::getSpillWeight(false, true, Freq); + li.weight += LiveIntervals::getSpillWeight(false, true, MBFI, MI); } for (MachineInstr::mmo_iterator MMOI = MI->memoperands_begin(), EE = MI->memoperands_end(); MMOI != EE; ++MMOI) { @@ -386,8 +385,8 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { toErase.push_back(I); continue; } - - MachineBasicBlock::iterator NextMI = llvm::next(I); + + MachineBasicBlock::iterator NextMI = std::next(I); if (NextMI == MBB->end()) continue; unsigned LoadReg = 0; diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp index ff0181e..3b7a04c 100644 --- a/lib/CodeGen/TailDuplication.cpp +++ b/lib/CodeGen/TailDuplication.cpp @@ -15,10 +15,10 @@ #define DEBUG_TYPE "tailduplication" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" @@ -61,9 +61,10 @@ namespace { class TailDuplicatePass : public MachineFunctionPass { const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; + const MachineBranchProbabilityInfo *MBPI; MachineModuleInfo *MMI; MachineRegisterInfo *MRI; - OwningPtr<RegScavenger> RS; + std::unique_ptr<RegScavenger> RS; bool PreRegAlloc; // SSAUpdateVRs - A list of virtual registers for which to update SSA form. @@ -78,7 +79,9 @@ namespace { explicit TailDuplicatePass() : MachineFunctionPass(ID), PreRegAlloc(false) {} - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override; private: void AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, @@ -128,10 +131,15 @@ INITIALIZE_PASS(TailDuplicatePass, "tailduplication", "Tail Duplication", false, false) bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) { + if (skipOptnoneFunction(*MF.getFunction())) + return false; + TII = MF.getTarget().getInstrInfo(); TRI = MF.getTarget().getRegisterInfo(); MRI = &MF.getRegInfo(); MMI = getAnalysisIfAvailable<MachineModuleInfo>(); + MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); + PreRegAlloc = MRI->isSSA(); RS.reset(); if (MRI->tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF)) @@ -144,6 +152,11 @@ bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) { return MadeChange; } +void TailDuplicatePass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineBranchProbabilityInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) { MachineBasicBlock *MBB = I; @@ -252,8 +265,8 @@ TailDuplicatePass::TailDuplicateAndUpdate(MachineBasicBlock *MBB, // Rewrite uses that are outside of the original def's block. MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg); while (UI != MRI->use_end()) { - MachineOperand &UseMO = UI.getOperand(); - MachineInstr *UseMI = &*UI; + MachineOperand &UseMO = *UI; + MachineInstr *UseMI = UseMO.getParent(); ++UI; if (UseMI->isDebugValue()) { // SSAUpdate can replace the use with an undef. That creates @@ -328,12 +341,10 @@ bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) { static bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB, const MachineRegisterInfo *MRI) { - for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), - UE = MRI->use_end(); UI != UE; ++UI) { - MachineInstr *UseMI = &*UI; - if (UseMI->isDebugValue()) + for (MachineInstr &UseMI : MRI->use_instructions(Reg)) { + if (UseMI.isDebugValue()) continue; - if (UseMI->getParent() != BB) + if (UseMI.getParent() != BB) return true; } return false; @@ -686,7 +697,7 @@ TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB, << "From simple Succ: " << *TailBB); MachineBasicBlock *NewTarget = *TailBB->succ_begin(); - MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(PredBB)); + MachineBasicBlock *NextBB = std::next(MachineFunction::iterator(PredBB)); // Make PredFBB explicit. if (PredCond.empty()) @@ -721,11 +732,12 @@ TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB, if (PredTBB) TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc()); + uint32_t Weight = MBPI->getEdgeWeight(PredBB, TailBB); PredBB->removeSuccessor(TailBB); unsigned NumSuccessors = PredBB->succ_size(); assert(NumSuccessors <= 1); if (NumSuccessors == 0 || *PredBB->succ_begin() != NewTarget) - PredBB->addSuccessor(NewTarget); + PredBB->addSuccessor(NewTarget, Weight); TDBBs.push_back(PredBB); } @@ -786,7 +798,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, // Update PredBB livein. RS->enterBasicBlock(PredBB); if (!PredBB->empty()) - RS->forward(prior(PredBB->end())); + RS->forward(std::prev(PredBB->end())); BitVector RegsLiveAtExit(TRI->getNumRegs()); RS->getRegsUsed(RegsLiveAtExit, false); for (MachineBasicBlock::livein_iterator I = TailBB->livein_begin(), @@ -836,7 +848,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, "TailDuplicate called on block with multiple successors!"); for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(), E = TailBB->succ_end(); I != E; ++I) - PredBB->addSuccessor(*I); + PredBB->addSuccessor(*I, MBPI->getEdgeWeight(TailBB, I)); Changed = true; ++NumTailDups; @@ -845,7 +857,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, // If TailBB was duplicated into all its predecessors except for the prior // block, which falls through unconditionally, move the contents of this // block into the prior block. - MachineBasicBlock *PrevBB = prior(MachineFunction::iterator(TailBB)); + MachineBasicBlock *PrevBB = std::prev(MachineFunction::iterator(TailBB)); MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0; SmallVector<MachineOperand, 4> PriorCond; // This has to check PrevBB->succ_size() because EH edges are ignored by diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp index bf4fd65..cae3ccd 100644 --- a/lib/CodeGen/TargetInstrInfo.cpp +++ b/lib/CodeGen/TargetInstrInfo.cpp @@ -13,10 +13,12 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInstrItineraries.h" @@ -372,6 +374,65 @@ canFoldMemoryOperand(const MachineInstr *MI, return MI->isCopy() && Ops.size() == 1 && canFoldCopy(MI, Ops[0]); } +static MachineInstr* foldPatchpoint(MachineFunction &MF, + MachineInstr *MI, + const SmallVectorImpl<unsigned> &Ops, + int FrameIndex, + const TargetInstrInfo &TII) { + unsigned StartIdx = 0; + switch (MI->getOpcode()) { + case TargetOpcode::STACKMAP: + StartIdx = 2; // Skip ID, nShadowBytes. + break; + case TargetOpcode::PATCHPOINT: { + // For PatchPoint, the call args are not foldable. + PatchPointOpers opers(MI); + StartIdx = opers.getVarIdx(); + break; + } + default: + llvm_unreachable("unexpected stackmap opcode"); + } + + // Return false if any operands requested for folding are not foldable (not + // part of the stackmap's live values). + for (SmallVectorImpl<unsigned>::const_iterator I = Ops.begin(), E = Ops.end(); + I != E; ++I) { + if (*I < StartIdx) + return 0; + } + + MachineInstr *NewMI = + MF.CreateMachineInstr(TII.get(MI->getOpcode()), MI->getDebugLoc(), true); + MachineInstrBuilder MIB(MF, NewMI); + + // No need to fold return, the meta data, and function arguments + for (unsigned i = 0; i < StartIdx; ++i) + MIB.addOperand(MI->getOperand(i)); + + for (unsigned i = StartIdx; i < MI->getNumOperands(); ++i) { + MachineOperand &MO = MI->getOperand(i); + if (std::find(Ops.begin(), Ops.end(), i) != Ops.end()) { + unsigned SpillSize; + unsigned SpillOffset; + // Compute the spill slot size and offset. + const TargetRegisterClass *RC = + MF.getRegInfo().getRegClass(MO.getReg()); + bool Valid = TII.getStackSlotRange(RC, MO.getSubReg(), SpillSize, + SpillOffset, &MF.getTarget()); + if (!Valid) + report_fatal_error("cannot spill patchpoint subregister operand"); + MIB.addImm(StackMaps::IndirectMemRefOp); + MIB.addImm(SpillSize); + MIB.addFrameIndex(FrameIndex); + MIB.addImm(SpillOffset); + } + else + MIB.addOperand(MO); + } + return NewMI; +} + /// foldMemoryOperand - Attempt to fold a load or store of the specified stack /// slot into the specified machine instruction for the specified operand(s). /// If this is possible, a new instruction is returned with the specified @@ -393,8 +454,18 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, assert(MBB && "foldMemoryOperand needs an inserted instruction"); MachineFunction &MF = *MBB->getParent(); - // Ask the target to do the actual folding. - if (MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, FI)) { + MachineInstr *NewMI = 0; + + if (MI->getOpcode() == TargetOpcode::STACKMAP || + MI->getOpcode() == TargetOpcode::PATCHPOINT) { + // Fold stackmap/patchpoint. + NewMI = foldPatchpoint(MF, MI, Ops, FI, *this); + } else { + // Ask the target to do the actual folding. + NewMI =foldMemoryOperandImpl(MF, MI, Ops, FI); + } + + if (NewMI) { NewMI->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); // Add a memory operand, foldMemoryOperandImpl doesn't do that. assert((!(Flags & MachineMemOperand::MOStore) || @@ -450,7 +521,19 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, MachineFunction &MF = *MBB.getParent(); // Ask the target to do the actual folding. - MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, LoadMI); + MachineInstr *NewMI = 0; + int FrameIndex = 0; + + if ((MI->getOpcode() == TargetOpcode::STACKMAP || + MI->getOpcode() == TargetOpcode::PATCHPOINT) && + isLoadFromStackSlot(LoadMI, FrameIndex)) { + // Fold stackmap/patchpoint. + NewMI = foldPatchpoint(MF, MI, Ops, FrameIndex, *this); + } else { + // Ask the target to do the actual folding. + NewMI = foldMemoryOperandImpl(MF, MI, Ops, LoadMI); + } + if (!NewMI) return 0; NewMI = MBB.insert(MI, NewMI); @@ -562,7 +645,7 @@ bool TargetInstrInfo::isSchedulingBoundary(const MachineInstr *MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const { // Terminators and labels can't be scheduled around. - if (MI->isTerminator() || MI->isLabel()) + if (MI->isTerminator() || MI->isPosition()) return true; // Don't attempt to schedule around any instruction that defines diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index 30305af..870370b 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -18,11 +18,15 @@ #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Mangler.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" @@ -201,6 +205,11 @@ static void InitLibcallNames(const char **Names, const TargetMachine &TM) { Names[RTLIB::FLOOR_F80] = "floorl"; Names[RTLIB::FLOOR_F128] = "floorl"; Names[RTLIB::FLOOR_PPCF128] = "floorl"; + Names[RTLIB::ROUND_F32] = "roundf"; + Names[RTLIB::ROUND_F64] = "round"; + Names[RTLIB::ROUND_F80] = "roundl"; + Names[RTLIB::ROUND_F128] = "roundl"; + Names[RTLIB::ROUND_PPCF128] = "roundl"; Names[RTLIB::COPYSIGN_F32] = "copysignf"; Names[RTLIB::COPYSIGN_F64] = "copysign"; Names[RTLIB::COPYSIGN_F80] = "copysignl"; @@ -659,21 +668,23 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) { /// NOTE: The constructor takes ownership of TLOF. TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm, const TargetLoweringObjectFile *tlof) - : TM(tm), TD(TM.getDataLayout()), TLOF(*tlof) { + : TM(tm), DL(TM.getDataLayout()), TLOF(*tlof) { initActions(); // Perform these initializations only once. - IsLittleEndian = TD->isLittleEndian(); + IsLittleEndian = DL->isLittleEndian(); MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8; MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize = MaxStoresPerMemmoveOptSize = 4; UseUnderscoreSetJmp = false; UseUnderscoreLongJmp = false; SelectIsExpensive = false; + HasMultipleConditionRegisters = false; IntDivIsCheap = false; Pow2DivIsCheap = false; JumpIsExpensive = false; PredictableSelectIsExpensive = false; + MaskAndBranchFoldingIsLegal = false; StackPointerRegisterToSaveRestore = 0; ExceptionPointerRegister = 0; ExceptionSelectorRegister = 0; @@ -754,6 +765,7 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::FCEIL, MVT::f16, Expand); setOperationAction(ISD::FRINT, MVT::f16, Expand); setOperationAction(ISD::FTRUNC, MVT::f16, Expand); + setOperationAction(ISD::FROUND, MVT::f16, Expand); setOperationAction(ISD::FLOG , MVT::f32, Expand); setOperationAction(ISD::FLOG2, MVT::f32, Expand); setOperationAction(ISD::FLOG10, MVT::f32, Expand); @@ -764,6 +776,7 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::FCEIL, MVT::f32, Expand); setOperationAction(ISD::FRINT, MVT::f32, Expand); setOperationAction(ISD::FTRUNC, MVT::f32, Expand); + setOperationAction(ISD::FROUND, MVT::f32, Expand); setOperationAction(ISD::FLOG , MVT::f64, Expand); setOperationAction(ISD::FLOG2, MVT::f64, Expand); setOperationAction(ISD::FLOG10, MVT::f64, Expand); @@ -774,6 +787,7 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::FCEIL, MVT::f64, Expand); setOperationAction(ISD::FRINT, MVT::f64, Expand); setOperationAction(ISD::FTRUNC, MVT::f64, Expand); + setOperationAction(ISD::FROUND, MVT::f64, Expand); setOperationAction(ISD::FLOG , MVT::f128, Expand); setOperationAction(ISD::FLOG2, MVT::f128, Expand); setOperationAction(ISD::FLOG10, MVT::f128, Expand); @@ -784,6 +798,7 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::FCEIL, MVT::f128, Expand); setOperationAction(ISD::FRINT, MVT::f128, Expand); setOperationAction(ISD::FTRUNC, MVT::f128, Expand); + setOperationAction(ISD::FROUND, MVT::f128, Expand); // Default ISD::TRAP to expand (which turns it into abort). setOperationAction(ISD::TRAP, MVT::Other, Expand); @@ -799,7 +814,7 @@ MVT TargetLoweringBase::getPointerTy(uint32_t AS) const { } unsigned TargetLoweringBase::getPointerSizeInBits(uint32_t AS) const { - return TD->getPointerSizeInBits(AS); + return DL->getPointerSizeInBits(AS); } unsigned TargetLoweringBase::getPointerTypeSizeInBits(Type *Ty) const { @@ -808,7 +823,7 @@ unsigned TargetLoweringBase::getPointerTypeSizeInBits(Type *Ty) const { } MVT TargetLoweringBase::getScalarShiftAmountTy(EVT LHSTy) const { - return MVT::getIntegerVT(8*TD->getPointerSize(0)); + return MVT::getIntegerVT(8*DL->getPointerSize(0)); } EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy) const { @@ -894,6 +909,59 @@ bool TargetLoweringBase::isLegalRC(const TargetRegisterClass *RC) const { return false; } +/// Replace/modify any TargetFrameIndex operands with a targte-dependent +/// sequence of memory operands that is recognized by PrologEpilogInserter. +MachineBasicBlock* +TargetLoweringBase::emitPatchPoint(MachineInstr *MI, + MachineBasicBlock *MBB) const { + const TargetMachine &TM = getTargetMachine(); + MachineFunction &MF = *MI->getParent()->getParent(); + + // MI changes inside this loop as we grow operands. + for(unsigned OperIdx = 0; OperIdx != MI->getNumOperands(); ++OperIdx) { + MachineOperand &MO = MI->getOperand(OperIdx); + if (!MO.isFI()) + continue; + + // foldMemoryOperand builds a new MI after replacing a single FI operand + // with the canonical set of five x86 addressing-mode operands. + int FI = MO.getIndex(); + MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), MI->getDesc()); + + // Copy operands before the frame-index. + for (unsigned i = 0; i < OperIdx; ++i) + MIB.addOperand(MI->getOperand(i)); + // Add frame index operands: direct-mem-ref tag, #FI, offset. + MIB.addImm(StackMaps::DirectMemRefOp); + MIB.addOperand(MI->getOperand(OperIdx)); + MIB.addImm(0); + // Copy the operands after the frame index. + for (unsigned i = OperIdx + 1; i != MI->getNumOperands(); ++i) + MIB.addOperand(MI->getOperand(i)); + + // Inherit previous memory operands. + MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + assert(MIB->mayLoad() && "Folded a stackmap use to a non-load!"); + + // Add a new memory operand for this FI. + const MachineFrameInfo &MFI = *MF.getFrameInfo(); + assert(MFI.getObjectOffset(FI) != -1); + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), + MachineMemOperand::MOLoad, + TM.getDataLayout()->getPointerSize(), + MFI.getObjectAlignment(FI)); + MIB->addMemOperand(MF, MMO); + + // Replace the instruction and update the operand index. + MBB->insert(MachineBasicBlock::iterator(MI), MIB); + OperIdx += (MIB->getNumOperands() - MI->getNumOperands()) - 1; + MI->eraseFromParent(); + MI = MIB; + } + return MBB; +} + /// findRepresentativeClass - Return the largest legal super-reg register class /// of the register class for the specified type and its associated "cost". std::pair<const TargetRegisterClass*, uint8_t> @@ -1019,7 +1087,7 @@ void TargetLoweringBase::computeRegisterProperties() { // that wider vector type. MVT EltVT = VT.getVectorElementType(); unsigned NElts = VT.getVectorNumElements(); - if (NElts != 1 && !shouldSplitVectorElementType(EltVT)) { + if (NElts != 1 && !shouldSplitVectorType(VT)) { bool IsLegalWiderType = false; // First try to promote the elements of integer vectors. If no legal // promotion was found, fallback to the widen-vector method. @@ -1087,7 +1155,7 @@ void TargetLoweringBase::computeRegisterProperties() { for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) { const TargetRegisterClass* RRC; uint8_t Cost; - tie(RRC, Cost) = findRepresentativeClass((MVT::SimpleValueType)i); + std::tie(RRC, Cost) = findRepresentativeClass((MVT::SimpleValueType)i); RepRegClassForVT[i] = RRC; RepRegClassCostForVT[i] = Cost; } @@ -1230,7 +1298,7 @@ void llvm::GetReturnInfo(Type* ReturnType, AttributeSet attr, /// function arguments in the caller parameter area. This is the actual /// alignment, not its logarithm. unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty) const { - return TD->getCallFrameTypeAlignment(Ty); + return DL->getABITypeAlignment(Ty); } //===----------------------------------------------------------------------===// @@ -1364,6 +1432,8 @@ bool TargetLoweringBase::isLegalAddressingMode(const AddrMode &AM, return false; // Allow 2*r as r+r. break; + default: // Don't allow n * r + return false; } return true; diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 59d7b57..e41fbfc 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -22,6 +22,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -34,7 +35,7 @@ #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; using namespace dwarf; @@ -43,19 +44,18 @@ using namespace dwarf; // ELF //===----------------------------------------------------------------------===// -MCSymbol * -TargetLoweringObjectFileELF::getCFIPersonalitySymbol(const GlobalValue *GV, - Mangler *Mang, - MachineModuleInfo *MMI) const { +MCSymbol *TargetLoweringObjectFileELF::getCFIPersonalitySymbol( + const GlobalValue *GV, Mangler &Mang, const TargetMachine &TM, + MachineModuleInfo *MMI) const { unsigned Encoding = getPersonalityEncoding(); switch (Encoding & 0x70) { default: report_fatal_error("We do not support this DWARF encoding yet!"); case dwarf::DW_EH_PE_absptr: - return getSymbol(*Mang, GV); + return TM.getSymbol(GV, Mang); case dwarf::DW_EH_PE_pcrel: { return getContext().GetOrCreateSymbol(StringRef("DW.ref.") + - getSymbol(*Mang, GV)->getName()); + TM.getSymbol(GV, Mang)->getName()); } } } @@ -87,24 +87,21 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer, Streamer.EmitSymbolValue(Sym, Size); } -const MCExpr *TargetLoweringObjectFileELF:: -getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang, - MachineModuleInfo *MMI, unsigned Encoding, - MCStreamer &Streamer) const { +const MCExpr *TargetLoweringObjectFileELF::getTTypeGlobalReference( + const GlobalValue *GV, unsigned Encoding, Mangler &Mang, + const TargetMachine &TM, MachineModuleInfo *MMI, + MCStreamer &Streamer) const { if (Encoding & dwarf::DW_EH_PE_indirect) { MachineModuleInfoELF &ELFMMI = MMI->getObjFileInfo<MachineModuleInfoELF>(); - SmallString<128> Name; - Mang->getNameWithPrefix(Name, GV, true); - Name += ".DW.stub"; + MCSymbol *SSym = getSymbolWithGlobalValueBase(GV, ".DW.stub", Mang, TM); // Add information about the stub reference to ELFMMI so that the stub // gets emitted by the asmprinter. - MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str()); MachineModuleInfoImpl::StubValueTy &StubSym = ELFMMI.getGVStubEntry(SSym); if (StubSym.getPointer() == 0) { - MCSymbol *Sym = getSymbol(*Mang, GV); + MCSymbol *Sym = TM.getSymbol(GV, Mang); StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); } @@ -114,7 +111,7 @@ getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang, } return TargetLoweringObjectFile:: - getTTypeGlobalReference(GV, Mang, MMI, Encoding, Streamer); + getTTypeGlobalReference(GV, Encoding, Mang, TM, MMI, Streamer); } static SectionKind @@ -199,10 +196,9 @@ getELFSectionFlags(SectionKind K) { return Flags; } - -const MCSection *TargetLoweringObjectFileELF:: -getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler *Mang, const TargetMachine &TM) const { +const MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal( + const GlobalValue *GV, SectionKind Kind, Mangler &Mang, + const TargetMachine &TM) const { StringRef SectionName = GV->getSection(); // Infer section flags from the section name if we can. @@ -235,7 +231,7 @@ static const char *getSectionPrefixForGlobal(SectionKind Kind) { const MCSection *TargetLoweringObjectFileELF:: SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler *Mang, const TargetMachine &TM) const { + Mangler &Mang, const TargetMachine &TM) const { // If we have -ffunction-section or -fdata-section then we should emit the // global value to a uniqued section specifically for it. bool EmitUniquedSection; @@ -252,12 +248,12 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Prefix = getSectionPrefixForGlobal(Kind); SmallString<128> Name(Prefix, Prefix+strlen(Prefix)); - MCSymbol *Sym = getSymbol(*Mang, GV); - Name.append(Sym->getName().begin(), Sym->getName().end()); + TM.getNameWithPrefix(Name, GV, Mang, true); + StringRef Group = ""; unsigned Flags = getELFSectionFlags(Kind); if (GV->isWeakForLinker()) { - Group = Sym->getName(); + Group = Name.substr(strlen(Prefix)); Flags |= ELF::SHF_GROUP; } @@ -405,11 +401,21 @@ TargetLoweringObjectFileELF::InitializeELF(bool UseInitArray_) { // MachO //===----------------------------------------------------------------------===// +/// getDepLibFromLinkerOpt - Extract the dependent library name from a linker +/// option string. Returns StringRef() if the option does not specify a library. +StringRef TargetLoweringObjectFileMachO:: +getDepLibFromLinkerOpt(StringRef LinkerOption) const { + const char *LibCmd = "-l"; + if (LinkerOption.startswith(LibCmd)) + return LinkerOption.substr(strlen(LibCmd)); + return StringRef(); +} + /// emitModuleFlags - Perform code emission for module flags. void TargetLoweringObjectFileMachO:: emitModuleFlags(MCStreamer &Streamer, ArrayRef<Module::ModuleFlagEntry> ModuleFlags, - Mangler *Mang, const TargetMachine &TM) const { + Mangler &Mang, const TargetMachine &TM) const { unsigned VersionVal = 0; unsigned ImageInfoFlags = 0; MDNode *LinkerOptions = 0; @@ -481,9 +487,9 @@ emitModuleFlags(MCStreamer &Streamer, Streamer.AddBlankLine(); } -const MCSection *TargetLoweringObjectFileMachO:: -getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler *Mang, const TargetMachine &TM) const { +const MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal( + const GlobalValue *GV, SectionKind Kind, Mangler &Mang, + const TargetMachine &TM) const { // Parse the section specifier and create it if valid. StringRef Segment, Section; unsigned TAA = 0, StubSize = 0; @@ -520,9 +526,44 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, return S; } +bool TargetLoweringObjectFileMachO::isSectionAtomizableBySymbols( + const MCSection &Section) const { + const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section); + + // Sections holding 1 byte strings are atomized based on the data + // they contain. + // Sections holding 2 byte strings require symbols in order to be + // atomized. + // There is no dedicated section for 4 byte strings. + if (SMO.getKind().isMergeable1ByteCString()) + return false; + + if (SMO.getSegmentName() == "__DATA" && + SMO.getSectionName() == "__cfstring") + return false; + + switch (SMO.getType()) { + default: + return true; + + // These sections are atomized at the element boundaries without using + // symbols. + case MachO::S_4BYTE_LITERALS: + case MachO::S_8BYTE_LITERALS: + case MachO::S_16BYTE_LITERALS: + case MachO::S_LITERAL_POINTERS: + case MachO::S_NON_LAZY_SYMBOL_POINTERS: + case MachO::S_LAZY_SYMBOL_POINTERS: + case MachO::S_MOD_INIT_FUNC_POINTERS: + case MachO::S_MOD_TERM_FUNC_POINTERS: + case MachO::S_INTERPOSING: + return false; + } +} + const MCSection *TargetLoweringObjectFileMachO:: SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler *Mang, const TargetMachine &TM) const { + Mangler &Mang, const TargetMachine &TM) const { // Handle thread local data. if (Kind.isThreadBSS()) return TLSBSSSection; @@ -556,7 +597,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, return FourByteConstantSection; if (Kind.isMergeableConst8()) return EightByteConstantSection; - if (Kind.isMergeableConst16() && SixteenByteConstantSection) + if (Kind.isMergeableConst16()) return SixteenByteConstantSection; } @@ -595,55 +636,31 @@ TargetLoweringObjectFileMachO::getSectionForConstant(SectionKind Kind) const { return FourByteConstantSection; if (Kind.isMergeableConst8()) return EightByteConstantSection; - if (Kind.isMergeableConst16() && SixteenByteConstantSection) + if (Kind.isMergeableConst16()) return SixteenByteConstantSection; return ReadOnlySection; // .const } -/// shouldEmitUsedDirectiveFor - This hook allows targets to selectively decide -/// not to emit the UsedDirective for some symbols in llvm.used. -// FIXME: REMOVE this (rdar://7071300) -bool TargetLoweringObjectFileMachO:: -shouldEmitUsedDirectiveFor(const GlobalValue *GV, Mangler *Mang) const { - /// On Darwin, internally linked data beginning with "L" or "l" does not have - /// the directive emitted (this occurs in ObjC metadata). - if (!GV) return false; - - // Check whether the mangled name has the "Private" or "LinkerPrivate" prefix. - if (GV->hasLocalLinkage() && !isa<Function>(GV)) { - // FIXME: ObjC metadata is currently emitted as internal symbols that have - // \1L and \0l prefixes on them. Fix them to be Private/LinkerPrivate and - // this horrible hack can go away. - MCSymbol *Sym = getSymbol(*Mang, GV); - if (Sym->getName()[0] == 'L' || Sym->getName()[0] == 'l') - return false; - } - - return true; -} - -const MCExpr *TargetLoweringObjectFileMachO:: -getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang, - MachineModuleInfo *MMI, unsigned Encoding, - MCStreamer &Streamer) const { +const MCExpr *TargetLoweringObjectFileMachO::getTTypeGlobalReference( + const GlobalValue *GV, unsigned Encoding, Mangler &Mang, + const TargetMachine &TM, MachineModuleInfo *MMI, + MCStreamer &Streamer) const { // The mach-o version of this method defaults to returning a stub reference. if (Encoding & DW_EH_PE_indirect) { MachineModuleInfoMachO &MachOMMI = MMI->getObjFileInfo<MachineModuleInfoMachO>(); - SmallString<128> Name; - Mang->getNameWithPrefix(Name, GV, true); - Name += "$non_lazy_ptr"; + MCSymbol *SSym = + getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr", Mang, TM); // Add information about the stub reference to MachOMMI so that the stub // gets emitted by the asmprinter. - MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str()); MachineModuleInfoImpl::StubValueTy &StubSym = GV->hasHiddenVisibility() ? MachOMMI.getHiddenGVStubEntry(SSym) : MachOMMI.getGVStubEntry(SSym); if (StubSym.getPointer() == 0) { - MCSymbol *Sym = getSymbol(*Mang, GV); + MCSymbol *Sym = TM.getSymbol(GV, Mang); StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); } @@ -652,27 +669,24 @@ getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang, Encoding & ~dwarf::DW_EH_PE_indirect, Streamer); } - return TargetLoweringObjectFile:: - getTTypeGlobalReference(GV, Mang, MMI, Encoding, Streamer); + return TargetLoweringObjectFile::getTTypeGlobalReference(GV, Encoding, Mang, + TM, MMI, Streamer); } -MCSymbol *TargetLoweringObjectFileMachO:: -getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang, - MachineModuleInfo *MMI) const { +MCSymbol *TargetLoweringObjectFileMachO::getCFIPersonalitySymbol( + const GlobalValue *GV, Mangler &Mang, const TargetMachine &TM, + MachineModuleInfo *MMI) const { // The mach-o version of this method defaults to returning a stub reference. MachineModuleInfoMachO &MachOMMI = MMI->getObjFileInfo<MachineModuleInfoMachO>(); - SmallString<128> Name; - Mang->getNameWithPrefix(Name, GV, true); - Name += "$non_lazy_ptr"; + MCSymbol *SSym = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr", Mang, TM); // Add information about the stub reference to MachOMMI so that the stub // gets emitted by the asmprinter. - MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str()); MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym); if (StubSym.getPointer() == 0) { - MCSymbol *Sym = getSymbol(*Mang, GV); + MCSymbol *Sym = TM.getSymbol(GV, Mang); StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); } @@ -718,58 +732,65 @@ getCOFFSectionFlags(SectionKind K) { return Flags; } -const MCSection *TargetLoweringObjectFileCOFF:: -getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler *Mang, const TargetMachine &TM) const { +const MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal( + const GlobalValue *GV, SectionKind Kind, Mangler &Mang, + const TargetMachine &TM) const { int Selection = 0; unsigned Characteristics = getCOFFSectionFlags(Kind); - SmallString<128> Name(GV->getSection().c_str()); + StringRef Name = GV->getSection(); + StringRef COMDATSymName = ""; if (GV->isWeakForLinker()) { Selection = COFF::IMAGE_COMDAT_SELECT_ANY; Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; - Name.append("$"); - Mang->getNameWithPrefix(Name, GV, false, false); + MCSymbol *Sym = TM.getSymbol(GV, Mang); + COMDATSymName = Sym->getName(); } return getContext().getCOFFSection(Name, Characteristics, Kind, - "", + COMDATSymName, Selection); } -static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) { +static const char *getCOFFSectionNameForUniqueGlobal(SectionKind Kind) { if (Kind.isText()) - return ".text$"; + return ".text"; if (Kind.isBSS ()) - return ".bss$"; - if (Kind.isThreadLocal()) { - // 'LLVM' is just an arbitary string to ensure that the section name gets - // sorted in between '.tls$AAA' and '.tls$ZZZ' by the linker. - return ".tls$LLVM"; - } + return ".bss"; + if (Kind.isThreadLocal()) + return ".tls$"; if (Kind.isWriteable()) - return ".data$"; - return ".rdata$"; + return ".data"; + return ".rdata"; } const MCSection *TargetLoweringObjectFileCOFF:: SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler *Mang, const TargetMachine &TM) const { + Mangler &Mang, const TargetMachine &TM) const { + // If we have -ffunction-sections then we should emit the global value to a + // uniqued section specifically for it. + bool EmitUniquedSection; + if (Kind.isText()) + EmitUniquedSection = TM.getFunctionSections(); + else + EmitUniquedSection = TM.getDataSections(); // If this global is linkonce/weak and the target handles this by emitting it // into a 'uniqued' section name, create and return the section now. - if (GV->isWeakForLinker()) { - const char *Prefix = getCOFFSectionPrefixForUniqueGlobal(Kind); - SmallString<128> Name(Prefix, Prefix+strlen(Prefix)); - Mang->getNameWithPrefix(Name, GV, false, false); - + // Section names depend on the name of the symbol which is not feasible if the + // symbol has private linkage. + if ((GV->isWeakForLinker() || EmitUniquedSection) && + !GV->hasPrivateLinkage()) { + const char *Name = getCOFFSectionNameForUniqueGlobal(Kind); unsigned Characteristics = getCOFFSectionFlags(Kind); Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; - - return getContext().getCOFFSection(Name.str(), Characteristics, - Kind, "", COFF::IMAGE_COMDAT_SELECT_ANY); + MCSymbol *Sym = TM.getSymbol(GV, Mang); + return getContext().getCOFFSection( + Name, Characteristics, Kind, Sym->getName(), + GV->isWeakForLinker() ? COFF::IMAGE_COMDAT_SELECT_ANY + : COFF::IMAGE_COMDAT_SELECT_NODUPLICATES); } if (Kind.isText()) @@ -787,10 +808,18 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, return DataSection; } +StringRef TargetLoweringObjectFileCOFF:: +getDepLibFromLinkerOpt(StringRef LinkerOption) const { + const char *LibCmd = "/DEFAULTLIB:"; + if (LinkerOption.startswith(LibCmd)) + return LinkerOption.substr(strlen(LibCmd)); + return StringRef(); +} + void TargetLoweringObjectFileCOFF:: emitModuleFlags(MCStreamer &Streamer, ArrayRef<Module::ModuleFlagEntry> ModuleFlags, - Mangler *Mang, const TargetMachine &TM) const { + Mangler &Mang, const TargetMachine &TM) const { MDNode *LinkerOptions = 0; // Look for the "Linker Options" flag, since it's the only one we support. diff --git a/lib/CodeGen/TargetOptionsImpl.cpp b/lib/CodeGen/TargetOptionsImpl.cpp index f7bf86b..3ca2017 100644 --- a/lib/CodeGen/TargetOptionsImpl.cpp +++ b/lib/CodeGen/TargetOptionsImpl.cpp @@ -12,8 +12,8 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/Function.h" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index b9a6b47..d9e5aae 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -144,7 +144,7 @@ public: initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry()); } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired<AliasAnalysis>(); AU.addPreserved<LiveVariables>(); @@ -156,7 +156,7 @@ public: } /// runOnMachineFunction - Pass entry point. - bool runOnMachineFunction(MachineFunction&); + bool runOnMachineFunction(MachineFunction&) override; }; } // end anonymous namespace @@ -229,7 +229,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, for (MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(SavedReg), UE = MRI->use_nodbg_end(); UI != UE; ++UI) { - MachineOperand &UseMO = UI.getOperand(); + MachineOperand &UseMO = *UI; if (!UseMO.isKill()) continue; KillMI = UseMO.getParent(); @@ -255,7 +255,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, ++KillPos; unsigned NumVisited = 0; - for (MachineBasicBlock::iterator I = llvm::next(OldPos); I != KillPos; ++I) { + for (MachineBasicBlock::iterator I = std::next(OldPos); I != KillPos; ++I) { MachineInstr *OtherMI = I; // DBG_VALUE cannot be counted against the limit. if (OtherMI->isDebugValue()) @@ -315,9 +315,7 @@ bool TwoAddressInstructionPass::noUseAfterLastDef(unsigned Reg, unsigned Dist, unsigned &LastDef) { LastDef = 0; unsigned LastUse = Dist; - for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Reg), - E = MRI->reg_end(); I != E; ++I) { - MachineOperand &MO = I.getOperand(); + for (MachineOperand &MO : MRI->reg_operands(Reg)) { MachineInstr *MI = MO.getParent(); if (MI->getParent() != MBB || MI->isDebugValue()) continue; @@ -417,9 +415,9 @@ static bool isKilled(MachineInstr &MI, unsigned Reg, MachineRegisterInfo::def_iterator Begin = MRI->def_begin(Reg); // If there are multiple defs, we can't do a simple analysis, so just // go with what the kill flag says. - if (llvm::next(Begin) != MRI->def_end()) + if (std::next(Begin) != MRI->def_end()) return true; - DefMI = &*Begin; + DefMI = Begin->getParent(); bool IsSrcPhys, IsDstPhys; unsigned SrcReg, DstReg; // If the def is something other than a copy, then it isn't going to @@ -457,7 +455,7 @@ MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB, if (!MRI->hasOneNonDBGUse(Reg)) // None or more than one use. return 0; - MachineInstr &UseMI = *MRI->use_nodbg_begin(Reg); + MachineInstr &UseMI = *MRI->use_instr_nodbg_begin(Reg); if (UseMI.getParent() != MBB) return 0; unsigned SrcReg; @@ -647,7 +645,7 @@ TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi, if (!Sunk) { DistanceMap.insert(std::make_pair(NewMI, Dist)); mi = NewMI; - nmi = llvm::next(mi); + nmi = std::next(mi); } // Update source and destination register maps. @@ -816,7 +814,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, // Move the copies connected to MI down as well. MachineBasicBlock::iterator Begin = MI; - MachineBasicBlock::iterator AfterMI = llvm::next(Begin); + MachineBasicBlock::iterator AfterMI = std::next(Begin); MachineBasicBlock::iterator End = AfterMI; while (End->isCopy() && Defs.count(End->getOperand(1).getReg())) { @@ -876,7 +874,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, } // Move debug info as well. - while (Begin != MBB->begin() && llvm::prior(Begin)->isDebugValue()) + while (Begin != MBB->begin() && std::prev(Begin)->isDebugValue()) --Begin; nmi = End; @@ -891,7 +889,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, LIS->handleMove(CopyMI); InsertPos = CopyMI; } - End = llvm::next(MachineBasicBlock::iterator(MI)); + End = std::next(MachineBasicBlock::iterator(MI)); } // Copies following MI may have been moved as well. @@ -914,19 +912,17 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, /// instruction too close to the defs of its register dependencies. bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist, MachineInstr *MI) { - for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(Reg), - DE = MRI->def_end(); DI != DE; ++DI) { - MachineInstr *DefMI = &*DI; - if (DefMI->getParent() != MBB || DefMI->isCopy() || DefMI->isCopyLike()) + for (MachineInstr &DefMI : MRI->def_instructions(Reg)) { + if (DefMI.getParent() != MBB || DefMI.isCopy() || DefMI.isCopyLike()) continue; - if (DefMI == MI) + if (&DefMI == MI) return true; // MI is defining something KillMI uses - DenseMap<MachineInstr*, unsigned>::iterator DDI = DistanceMap.find(DefMI); + DenseMap<MachineInstr*, unsigned>::iterator DDI = DistanceMap.find(&DefMI); if (DDI == DistanceMap.end()) return true; // Below MI unsigned DefDist = DDI->second; assert(Dist > DefDist && "Visited def already?"); - if (TII->getInstrLatency(InstrItins, DefMI) > (Dist - DefDist)) + if (TII->getInstrLatency(InstrItins, &DefMI) > (Dist - DefDist)) return true; } return false; @@ -1060,15 +1056,15 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, // Move the old kill above MI, don't forget to move debug info as well. MachineBasicBlock::iterator InsertPos = mi; - while (InsertPos != MBB->begin() && llvm::prior(InsertPos)->isDebugValue()) + while (InsertPos != MBB->begin() && std::prev(InsertPos)->isDebugValue()) --InsertPos; MachineBasicBlock::iterator From = KillMI; - MachineBasicBlock::iterator To = llvm::next(From); - while (llvm::prior(From)->isDebugValue()) + MachineBasicBlock::iterator To = std::next(From); + while (std::prev(From)->isDebugValue()) --From; MBB->splice(InsertPos, MBB, From, To); - nmi = llvm::prior(InsertPos); // Backtrack so we process the moved instr. + nmi = std::prev(InsertPos); // Backtrack so we process the moved instr. DistanceMap.erase(DI); // Update live variables @@ -1317,13 +1313,14 @@ collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) { assert(SrcReg && SrcMO.isUse() && "two address instruction invalid"); // Deal with <undef> uses immediately - simply rewrite the src operand. - if (SrcMO.isUndef()) { + if (SrcMO.isUndef() && !DstMO.getSubReg()) { // Constrain the DstReg register class if required. if (TargetRegisterInfo::isVirtualRegister(DstReg)) if (const TargetRegisterClass *RC = TII->getRegClass(MCID, SrcIdx, TRI, *MF)) MRI->constrainRegClass(DstReg, RC); SrcMO.setReg(DstReg); + SrcMO.setSubReg(0); DEBUG(dbgs() << "\t\trewrite undef:\t" << *MI); continue; } @@ -1349,6 +1346,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, unsigned LastCopiedReg = 0; SlotIndex LastCopyIdx; unsigned RegB = 0; + unsigned SubRegB = 0; for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) { unsigned SrcIdx = TiedPairs[tpi].first; unsigned DstIdx = TiedPairs[tpi].second; @@ -1359,6 +1357,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, // Grab RegB from the instruction because it may have changed if the // instruction was commuted. RegB = MI->getOperand(SrcIdx).getReg(); + SubRegB = MI->getOperand(SrcIdx).getSubReg(); if (RegA == RegB) { // The register is tied to multiple destinations (or else we would @@ -1383,8 +1382,25 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, #endif // Emit a copy. - BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), - TII->get(TargetOpcode::COPY), RegA).addReg(RegB); + MachineInstrBuilder MIB = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + TII->get(TargetOpcode::COPY), RegA); + // If this operand is folding a truncation, the truncation now moves to the + // copy so that the register classes remain valid for the operands. + MIB.addReg(RegB, 0, SubRegB); + const TargetRegisterClass *RC = MRI->getRegClass(RegB); + if (SubRegB) { + if (TargetRegisterInfo::isVirtualRegister(RegA)) { + assert(TRI->getMatchingSuperRegClass(RC, MRI->getRegClass(RegA), + SubRegB) && + "tied subregister must be a truncation"); + // The superreg class will not be used to constrain the subreg class. + RC = 0; + } + else { + assert(TRI->getMatchingSuperReg(RegA, SubRegB, MRI->getRegClass(RegB)) + && "tied subregister must be a truncation"); + } + } // Update DistanceMap. MachineBasicBlock::iterator PrevMI = MI; @@ -1404,7 +1420,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, } } - DEBUG(dbgs() << "\t\tprepend:\t" << *PrevMI); + DEBUG(dbgs() << "\t\tprepend:\t" << *MIB); MachineOperand &MO = MI->getOperand(SrcIdx); assert(MO.isReg() && MO.getReg() == RegB && MO.isUse() && @@ -1417,9 +1433,12 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, // Make sure regA is a legal regclass for the SrcIdx operand. if (TargetRegisterInfo::isVirtualRegister(RegA) && TargetRegisterInfo::isVirtualRegister(RegB)) - MRI->constrainRegClass(RegA, MRI->getRegClass(RegB)); - + MRI->constrainRegClass(RegA, RC); MO.setReg(RegA); + // The getMatchingSuper asserts guarantee that the register class projected + // by SubRegB is compatible with RegA with no subregister. So regardless of + // whether the dest oper writes a subreg, the source oper should not. + MO.setSubReg(0); // Propagate SrcRegMap. SrcRegMap[RegA] = RegB; @@ -1431,12 +1450,14 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, // Replace other (un-tied) uses of regB with LastCopiedReg. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) { + if (MO.isReg() && MO.getReg() == RegB && MO.getSubReg() == SubRegB && + MO.isUse()) { if (MO.isKill()) { MO.setIsKill(false); RemovedKillFlag = true; } MO.setReg(LastCopiedReg); + MO.setSubReg(0); } } } @@ -1509,7 +1530,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { Processed.clear(); for (MachineBasicBlock::iterator mi = MBB->begin(), me = MBB->end(); mi != me; ) { - MachineBasicBlock::iterator nmi = llvm::next(mi); + MachineBasicBlock::iterator nmi = std::next(mi); if (mi->isDebugValue()) { mi = nmi; continue; @@ -1664,7 +1685,7 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { } MachineBasicBlock::iterator EndMBBI = - llvm::next(MachineBasicBlock::iterator(MI)); + std::next(MachineBasicBlock::iterator(MI)); if (!DefEmitted) { DEBUG(dbgs() << "Turned: " << *MI << " into an IMPLICIT_DEF"); diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp index f735ef2..2e22082 100644 --- a/lib/CodeGen/UnreachableBlockElim.cpp +++ b/lib/CodeGen/UnreachableBlockElim.cpp @@ -23,32 +23,32 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/CFG.h" #include "llvm/IR/Constant.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Type.h" #include "llvm/Pass.h" -#include "llvm/Support/CFG.h" #include "llvm/Target/TargetInstrInfo.h" using namespace llvm; namespace { class UnreachableBlockElim : public FunctionPass { - virtual bool runOnFunction(Function &F); + bool runOnFunction(Function &F) override; public: static char ID; // Pass identification, replacement for typeid UnreachableBlockElim() : FunctionPass(ID) { initializeUnreachableBlockElimPass(*PassRegistry::getPassRegistry()); } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addPreserved<DominatorTree>(); + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addPreserved<DominatorTreeWrapperPass>(); } }; } @@ -95,8 +95,8 @@ bool UnreachableBlockElim::runOnFunction(Function &F) { namespace { class UnreachableMachineBlockElim : public MachineFunctionPass { - virtual bool runOnMachineFunction(MachineFunction &F); - virtual void getAnalysisUsage(AnalysisUsage &AU) const; + bool runOnMachineFunction(MachineFunction &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; MachineModuleInfo *MMI; public: static char ID; // Pass identification, replacement for typeid diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index e0aa405..f892e94 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/VirtRegMap.h" #include "LiveDebugVariables.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SparseSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveStackAnalysis.h" @@ -160,6 +161,7 @@ class VirtRegRewriter : public MachineFunctionPass { SlotIndexes *Indexes; LiveIntervals *LIS; VirtRegMap *VRM; + SparseSet<unsigned> PhysRegs; void rewrite(); void addMBBLiveIns(); @@ -167,9 +169,9 @@ public: static char ID; VirtRegRewriter() : MachineFunctionPass(ID) {} - virtual void getAnalysisUsage(AnalysisUsage &AU) const; + void getAnalysisUsage(AnalysisUsage &AU) const override; - virtual bool runOnMachineFunction(MachineFunction&); + bool runOnMachineFunction(MachineFunction&) override; }; } // end anonymous namespace @@ -267,6 +269,20 @@ void VirtRegRewriter::rewrite() { SmallVector<unsigned, 8> SuperKills; SmallPtrSet<const MachineInstr *, 4> NoReturnInsts; + // Here we have a SparseSet to hold which PhysRegs are actually encountered + // in the MF we are about to iterate over so that later when we call + // setPhysRegUsed, we are only doing it for physRegs that were actually found + // in the program and not for all of the possible physRegs for the given + // target architecture. If the target has a lot of physRegs, then for a small + // program there will be a significant compile time reduction here. + PhysRegs.clear(); + PhysRegs.setUniverse(TRI->getNumRegs()); + + // The function with uwtable should guarantee that the stack unwinder + // can unwind the stack to the previous frame. Thus, we can't apply the + // noreturn optimization if the caller function has uwtable attribute. + bool HasUWTable = MF->getFunction()->hasFnAttribute(Attribute::UWTable); + for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); MBBI != MBBE; ++MBBI) { DEBUG(MBBI->print(dbgs(), Indexes)); @@ -276,9 +292,12 @@ void VirtRegRewriter::rewrite() { MachineInstr *MI = MII; ++MII; - // Check if this instruction is a call to a noreturn function. - // If so, all the definitions set by this instruction can be ignored. - if (IsExitBB && MI->isCall()) + // Check if this instruction is a call to a noreturn function. If this + // is a call to noreturn function and we don't need the stack unwinding + // functionality (i.e. this function does not have uwtable attribute and + // the callee function has the nounwind attribute), then we can ignore + // the definitions set by this instruction. + if (!HasUWTable && IsExitBB && MI->isCall()) { for (MachineInstr::mop_iterator MOI = MI->operands_begin(), MOE = MI->operands_end(); MOI != MOE; ++MOI) { MachineOperand &MO = *MOI; @@ -294,6 +313,7 @@ void VirtRegRewriter::rewrite() { NoReturnInsts.insert(MI); break; } + } for (MachineInstr::mop_iterator MOI = MI->operands_begin(), MOE = MI->operands_end(); MOI != MOE; ++MOI) { @@ -303,6 +323,15 @@ void VirtRegRewriter::rewrite() { if (MO.isRegMask()) MRI->addPhysRegsUsedFromRegMask(MO.getRegMask()); + // If we encounter a VirtReg or PhysReg then get at the PhysReg and add + // it to the physreg bitset. Later we use only the PhysRegs that were + // actually encountered in the MF to populate the MRI's used physregs. + if (MO.isReg() && MO.getReg()) + PhysRegs.insert( + TargetRegisterInfo::isVirtualRegister(MO.getReg()) ? + VRM->getPhys(MO.getReg()) : + MO.getReg()); + if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue; unsigned VirtReg = MO.getReg(); @@ -376,20 +405,21 @@ void VirtRegRewriter::rewrite() { // Tell MRI about physical registers in use. if (NoReturnInsts.empty()) { - for (unsigned Reg = 1, RegE = TRI->getNumRegs(); Reg != RegE; ++Reg) - if (!MRI->reg_nodbg_empty(Reg)) - MRI->setPhysRegUsed(Reg); + for (SparseSet<unsigned>::iterator + RegI = PhysRegs.begin(), E = PhysRegs.end(); RegI != E; ++RegI) + if (!MRI->reg_nodbg_empty(*RegI)) + MRI->setPhysRegUsed(*RegI); } else { - for (unsigned Reg = 1, RegE = TRI->getNumRegs(); Reg != RegE; ++Reg) { + for (SparseSet<unsigned>::iterator + I = PhysRegs.begin(), E = PhysRegs.end(); I != E; ++I) { + unsigned Reg = *I; if (MRI->reg_nodbg_empty(Reg)) continue; // Check if this register has a use that will impact the rest of the // code. Uses in debug and noreturn instructions do not impact the // generated code. - for (MachineRegisterInfo::reg_nodbg_iterator It = - MRI->reg_nodbg_begin(Reg), - EndIt = MRI->reg_nodbg_end(); It != EndIt; ++It) { - if (!NoReturnInsts.count(&(*It))) { + for (MachineInstr &It : MRI->reg_nodbg_instructions(Reg)) { + if (!NoReturnInsts.count(&It)) { MRI->setPhysRegUsed(Reg); break; } @@ -397,3 +427,4 @@ void VirtRegRewriter::rewrite() { } } } + |