diff options
Diffstat (limited to 'lib/CodeGen')
140 files changed, 11193 insertions, 5917 deletions
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 91c1314..58b87e1 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -296,6 +296,16 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx, std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& RegRefs = State->GetRegRefs(); + // FIXME: We must leave subregisters of live super registers as live, so that + // we don't clear out the register tracking information for subregisters of + // super registers we're still tracking (and with which we're unioning + // subregister definitions). + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + if (TRI->isSuperRegister(Reg, *AI) && State->IsLive(*AI)) { + DEBUG(if (!header && footer) dbgs() << footer); + return; + } + if (!State->IsLive(Reg)) { KillIndices[Reg] = KillIdx; DefIndices[Reg] = ~0u; @@ -673,6 +683,21 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( goto next_super_reg; } + // We cannot rename 'Reg' to 'NewReg' if one of the uses of 'Reg' also + // defines 'NewReg' via an early-clobber operand. + auto Range = RegRefs.equal_range(Reg); + for (auto Q = Range.first, QE = Range.second; Q != QE; ++Q) { + auto UseMI = Q->second.Operand->getParent(); + int Idx = UseMI->findRegisterDefOperandIdx(NewReg, false, true, TRI); + if (Idx == -1) + continue; + + if (UseMI->getOperand(Idx).isEarlyClobber()) { + DEBUG(dbgs() << "(ec)"); + goto next_super_reg; + } + } + // Record that 'Reg' can be renamed to 'NewReg'. RenameMap.insert(std::pair<unsigned, unsigned>(Reg, NewReg)); } diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index 9a3b790..e50b846 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -30,10 +30,9 @@ using namespace llvm; -/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence -/// of insertvalue or extractvalue indices that identify a member, return -/// the linearized index of the start of the member. -/// +/// Compute the linearized index of a member in a nested aggregate/struct/array +/// by recursing and accumulating CurIndex as long as there are indices in the +/// index list. unsigned llvm::ComputeLinearIndex(Type *Ty, const unsigned *Indices, const unsigned *IndicesEnd, @@ -52,16 +51,23 @@ unsigned llvm::ComputeLinearIndex(Type *Ty, return ComputeLinearIndex(*EI, Indices+1, IndicesEnd, CurIndex); CurIndex = ComputeLinearIndex(*EI, nullptr, nullptr, CurIndex); } + assert(!Indices && "Unexpected out of bound"); return CurIndex; } // Given an array type, recursively traverse the elements. else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { Type *EltTy = ATy->getElementType(); - for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) { - if (Indices && *Indices == i) - return ComputeLinearIndex(EltTy, Indices+1, IndicesEnd, CurIndex); - CurIndex = ComputeLinearIndex(EltTy, nullptr, nullptr, CurIndex); + unsigned NumElts = ATy->getNumElements(); + // Compute the Linear offset when jumping one element of the array + unsigned EltLinearOffset = ComputeLinearIndex(EltTy, nullptr, nullptr, 0); + if (Indices) { + assert(*Indices < NumElts && "Unexpected out of bound"); + // If the indice is inside the array, compute the index to the requested + // elt and recurse inside the element with the end of the indices list + CurIndex += EltLinearOffset* *Indices; + return ComputeLinearIndex(EltTy, Indices+1, IndicesEnd, CurIndex); } + CurIndex += EltLinearOffset*NumElts; return CurIndex; } // We haven't found the type we're looking for, so keep searching. @@ -512,8 +518,9 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) { return false; } + const Function *F = ExitBB->getParent(); return returnTypeIsEligibleForTailCall( - ExitBB->getParent(), I, Ret, *TM.getSubtargetImpl()->getTargetLowering()); + F, I, Ret, *TM.getSubtargetImpl(*F)->getTargetLowering()); } bool llvm::returnTypeIsEligibleForTailCall(const Function *F, diff --git a/lib/CodeGen/Android.mk b/lib/CodeGen/Android.mk index 5cb351d..ec3cd77 100644 --- a/lib/CodeGen/Android.mk +++ b/lib/CodeGen/Android.mk @@ -24,6 +24,7 @@ codegen_SRC_FILES := \ ForwardControlFlowIntegrity.cpp \ GCMetadata.cpp \ GCMetadataPrinter.cpp \ + GCRootLowering.cpp \ GCStrategy.cpp \ GlobalMerge.cpp \ IfConversion.cpp \ @@ -95,6 +96,7 @@ codegen_SRC_FILES := \ ScheduleDAGPrinter.cpp \ ScoreboardHazardRecognizer.cpp \ ShadowStackGC.cpp \ + ShadowStackGCLowering.cpp \ SjLjEHPrepare.cpp \ SlotIndexes.cpp \ SpillPlacement.cpp \ @@ -104,6 +106,7 @@ codegen_SRC_FILES := \ StackMaps.cpp \ StackProtector.cpp \ StackSlotColoring.cpp \ + StatepointExampleGC.cpp \ TailDuplication.cpp \ TargetFrameLoweringImpl.cpp \ TargetInstrInfo.cpp \ @@ -114,7 +117,8 @@ codegen_SRC_FILES := \ TargetSchedule.cpp \ TwoAddressInstructionPass.cpp \ UnreachableBlockElim.cpp \ - VirtRegMap.cpp + VirtRegMap.cpp \ + WinEHPrepare.cpp # For the host # ===================================================== diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp index 66c6c63..6fe75ad 100644 --- a/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -88,8 +88,7 @@ void ARMException::endFunction(const MachineFunction *) { Asm->getFunctionNumber())); if (!MMI->getLandingPads().empty()) { // Emit references to personality. - if (const Function * Personality = - MMI->getPersonalities()[MMI->getPersonalityIndex()]) { + if (const Function *Personality = MMI->getPersonality()) { MCSymbol *PerSym = Asm->getSymbol(Personality); Asm->OutStreamer.EmitSymbolAttribute(PerSym, MCSA_Global); ATS.emitPersonality(PerSym); diff --git a/lib/CodeGen/AsmPrinter/Android.mk b/lib/CodeGen/AsmPrinter/Android.mk index cb8e96a..0ce457f 100644 --- a/lib/CodeGen/AsmPrinter/Android.mk +++ b/lib/CodeGen/AsmPrinter/Android.mk @@ -13,6 +13,7 @@ codegen_asmprinter_SRC_FILES := \ DwarfCFIException.cpp \ DwarfCompileUnit.cpp \ DwarfDebug.cpp \ + DwarfExpression.cpp \ DwarfFile.cpp \ DwarfStringPool.cpp \ DwarfUnit.cpp \ diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 8a32713..988381d 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -41,9 +41,11 @@ #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/Timer.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" @@ -98,15 +100,17 @@ static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &TD, return NumBits; } -AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer) +AsmPrinter::AsmPrinter(TargetMachine &tm, std::unique_ptr<MCStreamer> Streamer) : MachineFunctionPass(ID), TM(tm), MAI(tm.getMCAsmInfo()), - MII(tm.getSubtargetImpl()->getInstrInfo()), - OutContext(Streamer.getContext()), OutStreamer(Streamer), LastMI(nullptr), - LastFn(0), Counter(~0U), SetCounter(0) { - DD = nullptr; MMI = nullptr; LI = nullptr; MF = nullptr; + OutContext(Streamer->getContext()), OutStreamer(*Streamer.release()), + LastMI(nullptr), LastFn(0), Counter(~0U), SetCounter(0) { + DD = nullptr; + MMI = nullptr; + LI = nullptr; + MF = nullptr; CurrentFnSym = CurrentFnSymForSize = nullptr; GCMetadataPrinters = nullptr; - VerboseAsm = Streamer.isVerboseAsm(); + VerboseAsm = OutStreamer.isVerboseAsm(); } AsmPrinter::~AsmPrinter() { @@ -129,16 +133,17 @@ unsigned AsmPrinter::getFunctionNumber() const { } const TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const { - return TM.getSubtargetImpl()->getTargetLowering()->getObjFileLowering(); + return *TM.getObjFileLowering(); } /// getDataLayout - Return information about data layout. const DataLayout &AsmPrinter::getDataLayout() const { - return *TM.getSubtargetImpl()->getDataLayout(); + return *TM.getDataLayout(); } const MCSubtargetInfo &AsmPrinter::getSubtargetInfo() const { - return TM.getSubtarget<MCSubtargetInfo>(); + assert(MF && "getSubtargetInfo requires a valid MachineFunction!"); + return MF->getSubtarget<MCSubtargetInfo>(); } void AsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) { @@ -175,7 +180,7 @@ bool AsmPrinter::doInitialization(Module &M) { OutStreamer.InitSections(false); - Mang = new Mangler(TM.getSubtargetImpl()->getDataLayout()); + Mang = new Mangler(TM.getDataLayout()); // Emit the version-min deplyment target directive if needed. // @@ -210,7 +215,7 @@ bool AsmPrinter::doInitialization(Module &M) { assert(MI && "AsmPrinter didn't require GCModuleInfo?"); for (auto &I : *MI) if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*I)) - MP->beginAssembly(*this); + MP->beginAssembly(M, *MI, *this); // Emit module-level inline asm if it exists. if (!M.getModuleInlineAsm().empty()) { @@ -222,12 +227,25 @@ bool AsmPrinter::doInitialization(Module &M) { } if (MAI->doesSupportDebugInformation()) { - if (Triple(TM.getTargetTriple()).isKnownWindowsMSVCEnvironment()) + bool skip_dwarf = false; + if (Triple(TM.getTargetTriple()).isKnownWindowsMSVCEnvironment()) { Handlers.push_back(HandlerInfo(new WinCodeViewLineTables(this), DbgTimerName, CodeViewLineTablesGroupName)); - DD = new DwarfDebug(this, &M); - Handlers.push_back(HandlerInfo(DD, DbgTimerName, DWARFGroupName)); + // FIXME: Don't emit DWARF debug info if there's at least one function + // with AddressSanitizer instrumentation. + // This is a band-aid fix for PR22032. + for (auto &F : M.functions()) { + if (F.hasFnAttribute(Attribute::SanitizeAddress)) { + skip_dwarf = true; + break; + } + } + } + if (!skip_dwarf) { + DD = new DwarfDebug(this, &M); + Handlers.push_back(HandlerInfo(DD, DbgTimerName, DWARFGroupName)); + } } EHStreamer *ES = nullptr; @@ -241,7 +259,7 @@ bool AsmPrinter::doInitialization(Module &M) { case ExceptionHandling::ARM: ES = new ARMException(this); break; - case ExceptionHandling::ItaniumWinEH: + case ExceptionHandling::WinEH: switch (MAI->getWinEHEncodingType()) { default: llvm_unreachable("unsupported unwinding information encoding"); case WinEH::EncodingType::Itanium: @@ -323,6 +341,11 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { if (EmitSpecialLLVMGlobal(GV)) return; + // Skip the emission of global equivalents. The symbol can be emitted later + // on by emitGlobalGOTEquivs in case it turns out to be needed. + if (GlobalGOTEquivs.count(getSymbol(GV))) + return; + if (isVerbose()) { GV->printAsOperand(OutStreamer.GetCommentOS(), /*PrintType=*/false, GV->getParent()); @@ -336,12 +359,17 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { if (!GV->hasInitializer()) // External globals require no extra code. return; + GVSym->redefineIfPossible(); + if (GVSym->isDefined() || GVSym->isVariable()) + report_fatal_error("symbol '" + Twine(GVSym->getName()) + + "' is already defined"); + if (MAI->hasDotTypeDotSizeDirective()) OutStreamer.EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject); SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM); - const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout(); + const DataLayout *DL = TM.getDataLayout(); uint64_t Size = DL->getTypeAllocSize(GV->getType()->getElementType()); // If the alignment is specified, we *must* obey it. Overaligning a global @@ -508,6 +536,10 @@ void AsmPrinter::EmitFunctionHeader() { OutStreamer.GetCommentOS() << '\n'; } + // Emit the prefix data. + if (F->hasPrefixData()) + EmitGlobalConstant(F->getPrefixData()); + // Emit the CurrentFnSym. This is a virtual function to allow targets to // do their wild and crazy things as required. EmitFunctionEntryLabel(); @@ -528,27 +560,32 @@ void AsmPrinter::EmitFunctionHeader() { HI.Handler->beginFunction(MF); } - // Emit the prefix data. - if (F->hasPrefixData()) - EmitGlobalConstant(F->getPrefixData()); + // Emit the prologue data. + if (F->hasPrologueData()) + EmitGlobalConstant(F->getPrologueData()); } /// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the /// function. This can be overridden by targets as required to do custom stuff. void AsmPrinter::EmitFunctionEntryLabel() { + CurrentFnSym->redefineIfPossible(); + // The function label could have already been emitted if two symbols end up // conflicting due to asm renaming. Detect this and emit an error. - if (CurrentFnSym->isUndefined()) - return OutStreamer.EmitLabel(CurrentFnSym); + if (CurrentFnSym->isVariable()) + report_fatal_error("'" + Twine(CurrentFnSym->getName()) + + "' is a protected alias"); + if (CurrentFnSym->isDefined()) + report_fatal_error("'" + Twine(CurrentFnSym->getName()) + + "' label emitted multiple times to assembly file"); - report_fatal_error("'" + Twine(CurrentFnSym->getName()) + - "' label emitted multiple times to assembly file"); + return OutStreamer.EmitLabel(CurrentFnSym); } /// emitComments - Pretty-print comments for instructions. static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) { const MachineFunction *MF = MI.getParent()->getParent(); - const TargetMachine &TM = MF->getTarget(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); // Check for spills and reloads int FI; @@ -558,24 +595,20 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) { // We assume a single instruction only has a spill or reload, not // both. const MachineMemOperand *MMO; - if (TM.getSubtargetImpl()->getInstrInfo()->isLoadFromStackSlotPostFE(&MI, - FI)) { + if (TII->isLoadFromStackSlotPostFE(&MI, FI)) { if (FrameInfo->isSpillSlotObjectIndex(FI)) { MMO = *MI.memoperands_begin(); CommentOS << MMO->getSize() << "-byte Reload\n"; } - } else if (TM.getSubtargetImpl()->getInstrInfo()->hasLoadFromStackSlot( - &MI, MMO, FI)) { + } else if (TII->hasLoadFromStackSlot(&MI, MMO, FI)) { if (FrameInfo->isSpillSlotObjectIndex(FI)) CommentOS << MMO->getSize() << "-byte Folded Reload\n"; - } else if (TM.getSubtargetImpl()->getInstrInfo()->isStoreToStackSlotPostFE( - &MI, FI)) { + } else if (TII->isStoreToStackSlotPostFE(&MI, FI)) { if (FrameInfo->isSpillSlotObjectIndex(FI)) { MMO = *MI.memoperands_begin(); CommentOS << MMO->getSize() << "-byte Spill\n"; } - } else if (TM.getSubtargetImpl()->getInstrInfo()->hasStoreToStackSlot( - &MI, MMO, FI)) { + } else if (TII->hasStoreToStackSlot(&MI, MMO, FI)) { if (FrameInfo->isSpillSlotObjectIndex(FI)) CommentOS << MMO->getSize() << "-byte Folded Spill\n"; } @@ -589,9 +622,8 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) { /// that is an implicit def. void AsmPrinter::emitImplicitDef(const MachineInstr *MI) const { unsigned RegNo = MI->getOperand(0).getReg(); - OutStreamer.AddComment( - Twine("implicit-def: ") + - TM.getSubtargetImpl()->getRegisterInfo()->getName(RegNo)); + OutStreamer.AddComment(Twine("implicit-def: ") + + MMI->getContext().getRegisterInfo()->getName(RegNo)); OutStreamer.AddBlankLine(); } @@ -601,7 +633,7 @@ static void emitKill(const MachineInstr *MI, AsmPrinter &AP) { const MachineOperand &Op = MI->getOperand(i); assert(Op.isReg() && "KILL instruction must have only register operands"); Str += ' '; - Str += AP.TM.getSubtargetImpl()->getRegisterInfo()->getName(Op.getReg()); + Str += AP.MMI->getContext().getRegisterInfo()->getName(Op.getReg()); Str += (Op.isDef() ? "<def>" : "<kill>"); } AP.OutStreamer.AddComment(Str); @@ -629,9 +661,9 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { OS << V.getName(); DIExpression Expr = MI->getDebugExpression(); - if (Expr.isVariablePiece()) - OS << " [piece offset=" << Expr.getPieceOffset() - << " size=" << Expr.getPieceSize() << "]"; + if (Expr.isBitPiece()) + OS << " [bit_piece offset=" << Expr.getBitPieceOffset() + << " size=" << Expr.getBitPieceSize() << "]"; OS << " <- "; // The second operand is only an offset if it's an immediate. @@ -663,8 +695,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { Reg = MI->getOperand(0).getReg(); } else { assert(MI->getOperand(0).isFI() && "Unknown operand type"); - const TargetFrameLowering *TFI = - AP.TM.getSubtargetImpl()->getFrameLowering(); + const TargetFrameLowering *TFI = AP.MF->getSubtarget().getFrameLowering(); Offset += TFI->getFrameIndexReference(*AP.MF, MI->getOperand(0).getIndex(), Reg); Deref = true; @@ -678,7 +709,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { } if (Deref) OS << '['; - OS << AP.TM.getSubtargetImpl()->getRegisterInfo()->getName(Reg); + OS << AP.MMI->getContext().getRegisterInfo()->getName(Reg); } if (Deref) @@ -701,8 +732,7 @@ AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() { } bool AsmPrinter::needsSEHMoves() { - return MAI->getExceptionHandlingType() == ExceptionHandling::ItaniumWinEH && - MF->getFunction()->needsUnwindTableEntry(); + return MAI->usesWindowsCFI() && MF->getFunction()->needsUnwindTableEntry(); } void AsmPrinter::emitCFIInstruction(const MachineInstr &MI) { @@ -721,6 +751,16 @@ void AsmPrinter::emitCFIInstruction(const MachineInstr &MI) { emitCFIInstruction(CFI); } +void AsmPrinter::emitFrameAlloc(const MachineInstr &MI) { + // The operands are the MCSymbol and the frame offset of the allocation. + MCSymbol *FrameAllocSym = MI.getOperand(0).getMCSymbol(); + int FrameOffset = MI.getOperand(1).getImm(); + + // Emit a symbol assignment. + OutStreamer.EmitAssignment(FrameAllocSym, + MCConstantExpr::Create(FrameOffset, OutContext)); +} + /// EmitFunctionBody - This method emits the body and trailer for a /// function. void AsmPrinter::EmitFunctionBody() { @@ -759,6 +799,10 @@ void AsmPrinter::EmitFunctionBody() { emitCFIInstruction(MI); break; + case TargetOpcode::FRAME_ALLOC: + emitFrameAlloc(MI); + break; + case TargetOpcode::EH_LABEL: case TargetOpcode::GC_LABEL: OutStreamer.EmitLabel(MI.getOperand(0).getMCSymbol()); @@ -800,7 +844,7 @@ void AsmPrinter::EmitFunctionBody() { // labels from collapsing together. Just emit a noop. if ((MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode)) { MCInst Noop; - TM.getSubtargetImpl()->getInstrInfo()->getNoopForMachoTarget(Noop); + MF->getSubtarget().getInstrInfo()->getNoopForMachoTarget(Noop); OutStreamer.AddComment("avoids zero-length function"); // Targets can opt-out of emitting the noop here by leaving the opcode @@ -852,13 +896,95 @@ void AsmPrinter::EmitFunctionBody() { OutStreamer.AddBlankLine(); } -static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP); +/// \brief Compute the number of Global Variables that uses a Constant. +static unsigned getNumGlobalVariableUses(const Constant *C) { + if (!C) + return 0; + + if (isa<GlobalVariable>(C)) + return 1; + + unsigned NumUses = 0; + for (auto *CU : C->users()) + NumUses += getNumGlobalVariableUses(dyn_cast<Constant>(CU)); + + return NumUses; +} + +/// \brief Only consider global GOT equivalents if at least one user is a +/// cstexpr inside an initializer of another global variables. Also, don't +/// handle cstexpr inside instructions. During global variable emission, +/// candidates are skipped and are emitted later in case at least one cstexpr +/// isn't replaced by a PC relative GOT entry access. +static bool isGOTEquivalentCandidate(const GlobalVariable *GV, + unsigned &NumGOTEquivUsers) { + // Global GOT equivalents are unnamed private globals with a constant + // pointer initializer to another global symbol. They must point to a + // GlobalVariable or Function, i.e., as GlobalValue. + if (!GV->hasUnnamedAddr() || !GV->hasInitializer() || !GV->isConstant() || + !GV->isDiscardableIfUnused() || !dyn_cast<GlobalValue>(GV->getOperand(0))) + return false; + + // To be a got equivalent, at least one of its users need to be a constant + // expression used by another global variable. + for (auto *U : GV->users()) + NumGOTEquivUsers += getNumGlobalVariableUses(cast<Constant>(U)); + + return NumGOTEquivUsers > 0; +} + +/// \brief Unnamed constant global variables solely contaning a pointer to +/// another globals variable is equivalent to a GOT table entry; it contains the +/// the address of another symbol. Optimize it and replace accesses to these +/// "GOT equivalents" by using the GOT entry for the final global instead. +/// Compute GOT equivalent candidates among all global variables to avoid +/// emitting them if possible later on, after it use is replaced by a GOT entry +/// access. +void AsmPrinter::computeGlobalGOTEquivs(Module &M) { + if (!getObjFileLowering().supportIndirectSymViaGOTPCRel()) + return; + + for (const auto &G : M.globals()) { + unsigned NumGOTEquivUsers = 0; + if (!isGOTEquivalentCandidate(&G, NumGOTEquivUsers)) + continue; + + const MCSymbol *GOTEquivSym = getSymbol(&G); + GlobalGOTEquivs[GOTEquivSym] = std::make_pair(&G, NumGOTEquivUsers); + } +} + +/// \brief Constant expressions using GOT equivalent globals may not be eligible +/// for PC relative GOT entry conversion, in such cases we need to emit such +/// globals we previously omitted in EmitGlobalVariable. +void AsmPrinter::emitGlobalGOTEquivs() { + if (!getObjFileLowering().supportIndirectSymViaGOTPCRel()) + return; + + while (!GlobalGOTEquivs.empty()) { + DenseMap<const MCSymbol *, GOTEquivUsePair>::iterator I = + GlobalGOTEquivs.begin(); + const MCSymbol *S = I->first; + const GlobalVariable *GV = I->second.first; + GlobalGOTEquivs.erase(S); + EmitGlobalVariable(GV); + } +} bool AsmPrinter::doFinalization(Module &M) { + // Gather all GOT equivalent globals in the module. We really need two + // passes over the globals: one to compute and another to avoid its emission + // in EmitGlobalVariable, otherwise we would not be able to handle cases + // where the got equivalent shows up before its use. + computeGlobalGOTEquivs(M); + // Emit global variables. for (const auto &G : M.globals()) EmitGlobalVariable(&G); + // Emit remaining GOT equivalent globals. + emitGlobalGOTEquivs(); + // Emit visibility info for declarations for (const Function &F : M) { if (!F.isDeclaration()) @@ -875,10 +1001,15 @@ bool AsmPrinter::doFinalization(Module &M) { JumpInstrTableInfo *JITI = getAnalysisIfAvailable<JumpInstrTableInfo>(); if (JITI && !JITI->getTables().empty()) { + // Since we're at the module level we can't use a function specific + // MCSubtargetInfo - instead create one with the module defaults. + std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo( + TM.getTargetTriple(), TM.getTargetCPU(), TM.getTargetFeatureString())); unsigned Arch = Triple(getTargetTriple()).getArch(); bool IsThumb = (Arch == Triple::thumb || Arch == Triple::thumbeb); + const TargetInstrInfo *TII = TM.getSubtargetImpl()->getInstrInfo(); MCInst TrapInst; - TM.getSubtargetImpl()->getInstrInfo()->getTrap(TrapInst); + TII->getTrap(TrapInst); unsigned LogAlignment = llvm::Log2_64(JITI->entryByteAlignment()); // Emit the right section for these functions. @@ -904,9 +1035,8 @@ bool AsmPrinter::doFinalization(Module &M) { const MCSymbolRefExpr *TargetSymRef = MCSymbolRefExpr::Create(TargetSymbol, MCSymbolRefExpr::VK_PLT, OutContext); - TM.getSubtargetImpl()->getInstrInfo()->getUnconditionalBranch( - JumpToFun, TargetSymRef); - OutStreamer.EmitInstruction(JumpToFun, getSubtargetInfo()); + TII->getUnconditionalBranch(JumpToFun, TargetSymRef); + OutStreamer.EmitInstruction(JumpToFun, *STI); ++Count; } @@ -914,7 +1044,7 @@ bool AsmPrinter::doFinalization(Module &M) { uint64_t Remaining = NextPowerOf2(Count) - Count; for (uint64_t C = 0; C < Remaining; ++C) { EmitAlignment(LogAlignment); - OutStreamer.EmitInstruction(TrapInst, getSubtargetInfo()); + OutStreamer.EmitInstruction(TrapInst, *STI); } } @@ -974,18 +1104,34 @@ bool AsmPrinter::doFinalization(Module &M) { EmitVisibility(Name, Alias.getVisibility()); // Emit the directives as assignments aka .set: - OutStreamer.EmitAssignment(Name, lowerConstant(Alias.getAliasee(), *this)); + OutStreamer.EmitAssignment(Name, lowerConstant(Alias.getAliasee())); } GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>(); assert(MI && "AsmPrinter didn't require GCModuleInfo?"); for (GCModuleInfo::iterator I = MI->end(), E = MI->begin(); I != E; ) if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(**--I)) - MP->finishAssembly(*this); + MP->finishAssembly(M, *MI, *this); // Emit llvm.ident metadata in an '.ident' directive. EmitModuleIdents(M); + // Emit __morestack address if needed for indirect calls. + if (MMI->usesMorestackAddr()) { + const MCSection *ReadOnlySection = + getObjFileLowering().getSectionForConstant(SectionKind::getReadOnly(), + /*C=*/nullptr); + OutStreamer.SwitchSection(ReadOnlySection); + + MCSymbol *AddrSymbol = + OutContext.GetOrCreateSymbol(StringRef("__morestack_addr")); + OutStreamer.EmitLabel(AddrSymbol); + + unsigned PtrSize = TM.getDataLayout()->getPointerSize(0); + OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("__morestack"), + PtrSize); + } + // If we don't have any trampolines, then we don't require stack memory // to be executable. Some targets have a directive to declare this. Function *InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline"); @@ -1044,7 +1190,7 @@ void AsmPrinter::EmitConstantPool() { unsigned Align = CPE.getAlignment(); SectionKind Kind = - CPE.getSectionKind(TM.getSubtargetImpl()->getDataLayout()); + CPE.getSectionKind(TM.getDataLayout()); const Constant *C = nullptr; if (!CPE.isMachineConstantPoolEntry()) @@ -1098,7 +1244,7 @@ void AsmPrinter::EmitConstantPool() { Type *Ty = CPE.getType(); Offset = NewOffset + - TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(Ty); + TM.getDataLayout()->getTypeAllocSize(Ty); OutStreamer.EmitLabel(Sym); if (CPE.isMachineConstantPoolEntry()) @@ -1113,7 +1259,7 @@ void AsmPrinter::EmitConstantPool() { /// by the current function to the current output stream. /// void AsmPrinter::EmitJumpTableInfo() { - const DataLayout *DL = MF->getSubtarget().getDataLayout(); + const DataLayout *DL = MF->getTarget().getDataLayout(); const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); if (!MJTI) return; if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline) return; @@ -1123,29 +1269,21 @@ void AsmPrinter::EmitJumpTableInfo() { // Pick the directive to use to print the jump table entries, and switch to // the appropriate section. const Function *F = MF->getFunction(); - bool JTInDiffSection = false; - if (// In PIC mode, we need to emit the jump table to the same section as the - // function body itself, otherwise the label differences won't make sense. - // FIXME: Need a better predicate for this: what about custom entries? - MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 || - // We should also do if the section name is NULL or function is declared - // in discardable section - // FIXME: this isn't the right predicate, should be based on the MCSection - // for the function. - F->isWeakForLinker()) { - OutStreamer.SwitchSection( - getObjFileLowering().SectionForGlobal(F, *Mang, TM)); + const TargetLoweringObjectFile &TLOF = getObjFileLowering(); + bool JTInDiffSection = !TLOF.shouldPutJumpTableInFunctionSection( + MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32, + *F); + if (!JTInDiffSection) { + OutStreamer.SwitchSection(TLOF.SectionForGlobal(F, *Mang, TM)); } else { // Otherwise, drop it in the readonly section. const MCSection *ReadOnlySection = - getObjFileLowering().getSectionForConstant(SectionKind::getReadOnly(), - /*C=*/nullptr); + TLOF.getSectionForJumpTable(*F, *Mang, TM); OutStreamer.SwitchSection(ReadOnlySection); - JTInDiffSection = true; } EmitAlignment(Log2_32( - MJTI->getEntryAlignment(*TM.getSubtargetImpl()->getDataLayout()))); + MJTI->getEntryAlignment(*TM.getDataLayout()))); // Jump tables in code sections are marked with a data_region directive // where that's supported. @@ -1163,7 +1301,7 @@ void AsmPrinter::EmitJumpTableInfo() { if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 && MAI->doesSetDirectiveSuppressesReloc()) { SmallPtrSet<const MachineBasicBlock*, 16> EmittedSets; - const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); + const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF,JTI,OutContext); for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) { const MachineBasicBlock *MBB = JTBBs[ii]; @@ -1207,9 +1345,8 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, case MachineJumpTableInfo::EK_Inline: llvm_unreachable("Cannot emit EK_Inline jump table entry"); case MachineJumpTableInfo::EK_Custom32: - Value = - TM.getSubtargetImpl()->getTargetLowering()->LowerCustomJumpTableEntry( - MJTI, MBB, UID, OutContext); + Value = MF->getSubtarget().getTargetLowering()->LowerCustomJumpTableEntry( + MJTI, MBB, UID, OutContext); break; case MachineJumpTableInfo::EK_BlockAddress: // EK_BlockAddress - Each entry is a plain address of block, e.g.: @@ -1248,7 +1385,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, break; } Value = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext); - const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); + const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF, UID, OutContext); Value = MCBinaryExpr::CreateSub(Value, Base, OutContext); break; @@ -1258,7 +1395,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, assert(Value && "Unknown entry kind!"); unsigned EntrySize = - MJTI->getEntrySize(*TM.getSubtargetImpl()->getDataLayout()); + MJTI->getEntrySize(*TM.getDataLayout()); OutStreamer.EmitValue(Value, EntrySize); } @@ -1368,7 +1505,7 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) { } // Emit the function pointers in the target-specific order - const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout(); + const DataLayout *DL = TM.getDataLayout(); unsigned Align = Log2_32(DL->getPointerPrefAlignment()); std::stable_sort(Structors.begin(), Structors.end(), [](const Structor &L, @@ -1483,25 +1620,26 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, // void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalObject *GV) const { if (GV) - NumBits = getGVAlignmentLog2(GV, *TM.getSubtargetImpl()->getDataLayout(), + NumBits = getGVAlignmentLog2(GV, *TM.getDataLayout(), NumBits); if (NumBits == 0) return; // 1-byte aligned: no need to emit alignment. + assert(NumBits < + static_cast<unsigned>(std::numeric_limits<unsigned>::digits) && + "undefined behavior"); if (getCurrentSection()->getKind().isText()) - OutStreamer.EmitCodeAlignment(1 << NumBits); + OutStreamer.EmitCodeAlignment(1u << NumBits); else - OutStreamer.EmitValueToAlignment(1 << NumBits); + OutStreamer.EmitValueToAlignment(1u << NumBits); } //===----------------------------------------------------------------------===// // Constant emission. //===----------------------------------------------------------------------===// -/// lowerConstant - Lower the specified LLVM Constant to an MCExpr. -/// -static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { - MCContext &Ctx = AP.OutContext; +const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { + MCContext &Ctx = OutContext; if (CV->isNullValue() || isa<UndefValue>(CV)) return MCConstantExpr::Create(0, Ctx); @@ -1510,19 +1648,18 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { return MCConstantExpr::Create(CI->getZExtValue(), Ctx); if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) - return MCSymbolRefExpr::Create(AP.getSymbol(GV), Ctx); + return MCSymbolRefExpr::Create(getSymbol(GV), Ctx); if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) - return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx); + return MCSymbolRefExpr::Create(GetBlockAddressSymbol(BA), Ctx); const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV); if (!CE) { llvm_unreachable("Unknown constant value to lower!"); } - if (const MCExpr *RelocExpr = - AP.getObjFileLowering().getExecutableRelativeSymbol(CE, *AP.Mang, - AP.TM)) + if (const MCExpr *RelocExpr + = getObjFileLowering().getExecutableRelativeSymbol(CE, *Mang, TM)) return RelocExpr; switch (CE->getOpcode()) { @@ -1531,9 +1668,9 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { // opportunities. Attempt to fold the expression using DataLayout as a // last resort before giving up. if (Constant *C = ConstantFoldConstantExpression( - CE, AP.TM.getSubtargetImpl()->getDataLayout())) + CE, TM.getDataLayout())) if (C != CE) - return lowerConstant(C, AP); + return lowerConstant(C); // Otherwise report the problem to the user. { @@ -1541,16 +1678,17 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { raw_string_ostream OS(S); OS << "Unsupported expression in static initializer: "; CE->printAsOperand(OS, /*PrintType=*/false, - !AP.MF ? nullptr : AP.MF->getFunction()->getParent()); + !MF ? nullptr : MF->getFunction()->getParent()); report_fatal_error(OS.str()); } case Instruction::GetElementPtr: { - const DataLayout &DL = *AP.TM.getSubtargetImpl()->getDataLayout(); + const DataLayout &DL = *TM.getDataLayout(); + // Generate a symbolic expression for the byte address APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0); cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI); - const MCExpr *Base = lowerConstant(CE->getOperand(0), AP); + const MCExpr *Base = lowerConstant(CE->getOperand(0)); if (!OffsetAI) return Base; @@ -1566,26 +1704,28 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { // is reasonable to treat their delta as a 32-bit value. // FALL THROUGH. case Instruction::BitCast: - return lowerConstant(CE->getOperand(0), AP); + return lowerConstant(CE->getOperand(0)); case Instruction::IntToPtr: { - const DataLayout &DL = *AP.TM.getSubtargetImpl()->getDataLayout(); + const DataLayout &DL = *TM.getDataLayout(); + // Handle casts to pointers by changing them into casts to the appropriate // integer type. This promotes constant folding and simplifies this code. Constant *Op = CE->getOperand(0); Op = ConstantExpr::getIntegerCast(Op, DL.getIntPtrType(CV->getType()), false/*ZExt*/); - return lowerConstant(Op, AP); + return lowerConstant(Op); } case Instruction::PtrToInt: { - const DataLayout &DL = *AP.TM.getSubtargetImpl()->getDataLayout(); + const DataLayout &DL = *TM.getDataLayout(); + // Support only foldable casts to/from pointers that can be eliminated by // changing the pointer to the appropriately sized integer type. Constant *Op = CE->getOperand(0); Type *Ty = CE->getType(); - const MCExpr *OpExpr = lowerConstant(Op, AP); + const MCExpr *OpExpr = lowerConstant(Op); // We can emit the pointer value into this slot if the slot is an // integer slot equal to the size of the pointer. @@ -1611,8 +1751,8 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { case Instruction::And: case Instruction::Or: case Instruction::Xor: { - const MCExpr *LHS = lowerConstant(CE->getOperand(0), AP); - const MCExpr *RHS = lowerConstant(CE->getOperand(1), AP); + const MCExpr *LHS = lowerConstant(CE->getOperand(0)); + const MCExpr *RHS = lowerConstant(CE->getOperand(1)); switch (CE->getOpcode()) { default: llvm_unreachable("Unknown binary operator constant cast expr"); case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx); @@ -1629,7 +1769,9 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { } } -static void emitGlobalConstantImpl(const Constant *C, AsmPrinter &AP); +static void emitGlobalConstantImpl(const Constant *C, AsmPrinter &AP, + const Constant *BaseCV = nullptr, + uint64_t Offset = 0); /// isRepeatedByteSequence - Determine whether the given value is /// composed of a repeated sequence of identical bytes and return the @@ -1653,7 +1795,7 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) { if (CI->getBitWidth() > 64) return -1; uint64_t Size = - TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(V->getType()); + TM.getDataLayout()->getTypeAllocSize(V->getType()); uint64_t Value = CI->getZExtValue(); // Make sure the constant is at least 8 bits long and has a power @@ -1698,7 +1840,7 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, int Value = isRepeatedByteSequence(CDS, AP.TM); if (Value != -1) { uint64_t Bytes = - AP.TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize( + AP.TM.getDataLayout()->getTypeAllocSize( CDS->getType()); // Don't emit a 1-byte object as a .fill. if (Bytes > 1) @@ -1749,7 +1891,7 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, } } - const DataLayout &DL = *AP.TM.getSubtargetImpl()->getDataLayout(); + const DataLayout &DL = *AP.TM.getDataLayout(); unsigned Size = DL.getTypeAllocSize(CDS->getType()); unsigned EmittedSize = DL.getTypeAllocSize(CDS->getType()->getElementType()) * CDS->getNumElements(); @@ -1758,20 +1900,22 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, } -static void emitGlobalConstantArray(const ConstantArray *CA, AsmPrinter &AP) { +static void emitGlobalConstantArray(const ConstantArray *CA, AsmPrinter &AP, + const Constant *BaseCV, uint64_t Offset) { // See if we can aggregate some values. Make sure it can be // represented as a series of bytes of the constant value. int Value = isRepeatedByteSequence(CA, AP.TM); + const DataLayout &DL = *AP.TM.getDataLayout(); if (Value != -1) { - uint64_t Bytes = - AP.TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize( - CA->getType()); + uint64_t Bytes = DL.getTypeAllocSize(CA->getType()); AP.OutStreamer.EmitFill(Bytes, Value); } else { - for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) - emitGlobalConstantImpl(CA->getOperand(i), AP); + for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) { + emitGlobalConstantImpl(CA->getOperand(i), AP, BaseCV, Offset); + Offset += DL.getTypeAllocSize(CA->getOperand(i)->getType()); + } } } @@ -1779,7 +1923,7 @@ static void emitGlobalConstantVector(const ConstantVector *CV, AsmPrinter &AP) { for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i) emitGlobalConstantImpl(CV->getOperand(i), AP); - const DataLayout &DL = *AP.TM.getSubtargetImpl()->getDataLayout(); + const DataLayout &DL = *AP.TM.getDataLayout(); unsigned Size = DL.getTypeAllocSize(CV->getType()); unsigned EmittedSize = DL.getTypeAllocSize(CV->getType()->getElementType()) * CV->getType()->getNumElements(); @@ -1787,24 +1931,25 @@ static void emitGlobalConstantVector(const ConstantVector *CV, AsmPrinter &AP) { AP.OutStreamer.EmitZeros(Padding); } -static void emitGlobalConstantStruct(const ConstantStruct *CS, AsmPrinter &AP) { +static void emitGlobalConstantStruct(const ConstantStruct *CS, AsmPrinter &AP, + const Constant *BaseCV, uint64_t Offset) { // Print the fields in successive locations. Pad to align if needed! - const DataLayout *DL = AP.TM.getSubtargetImpl()->getDataLayout(); + const DataLayout *DL = AP.TM.getDataLayout(); unsigned Size = DL->getTypeAllocSize(CS->getType()); const StructLayout *Layout = DL->getStructLayout(CS->getType()); uint64_t SizeSoFar = 0; for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i) { const Constant *Field = CS->getOperand(i); + // Print the actual field value. + emitGlobalConstantImpl(Field, AP, BaseCV, Offset+SizeSoFar); + // Check if padding is needed and insert one or more 0s. uint64_t FieldSize = DL->getTypeAllocSize(Field->getType()); uint64_t PadSize = ((i == e-1 ? Size : Layout->getElementOffset(i+1)) - Layout->getElementOffset(i)) - FieldSize; SizeSoFar += FieldSize + PadSize; - // Now print the actual field value. - emitGlobalConstantImpl(Field, AP); - // Insert padding - this may include padding to increase the size of the // current field up to the ABI size (if the struct is not packed) as well // as padding to ensure that the next field starts at the right offset. @@ -1839,7 +1984,7 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) { // PPC's long double has odd notions of endianness compared to how LLVM // handles it: p[0] goes first for *big* endian on PPC. - if (AP.TM.getSubtargetImpl()->getDataLayout()->isBigEndian() && + if (AP.TM.getDataLayout()->isBigEndian() && !CFP->getType()->isPPC_FP128Ty()) { int Chunk = API.getNumWords() - 1; @@ -1858,13 +2003,13 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) { } // Emit the tail padding for the long double. - const DataLayout &DL = *AP.TM.getSubtargetImpl()->getDataLayout(); + const DataLayout &DL = *AP.TM.getDataLayout(); AP.OutStreamer.EmitZeros(DL.getTypeAllocSize(CFP->getType()) - DL.getTypeStoreSize(CFP->getType())); } static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) { - const DataLayout *DL = AP.TM.getSubtargetImpl()->getDataLayout(); + const DataLayout *DL = AP.TM.getDataLayout(); unsigned BitWidth = CI->getBitWidth(); // Copy the value as we may massage the layout for constants whose bit width @@ -1910,7 +2055,7 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) { // Emit the extra bits after the 64-bits chunks. // Emit a directive that fills the expected size. - uint64_t Size = AP.TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize( + uint64_t Size = AP.TM.getDataLayout()->getTypeAllocSize( CI->getType()); Size -= (BitWidth / 64) * 8; assert(Size && Size * 8 >= ExtraBitsSize && @@ -1920,9 +2065,100 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) { } } -static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) { - const DataLayout *DL = AP.TM.getSubtargetImpl()->getDataLayout(); +/// \brief Transform a not absolute MCExpr containing a reference to a GOT +/// equivalent global, by a target specific GOT pc relative access to the +/// final symbol. +static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME, + const Constant *BaseCst, + uint64_t Offset) { + // The global @foo below illustrates a global that uses a got equivalent. + // + // @bar = global i32 42 + // @gotequiv = private unnamed_addr constant i32* @bar + // @foo = i32 trunc (i64 sub (i64 ptrtoint (i32** @gotequiv to i64), + // i64 ptrtoint (i32* @foo to i64)) + // to i32) + // + // The cstexpr in @foo is converted into the MCExpr `ME`, where we actually + // check whether @foo is suitable to use a GOTPCREL. `ME` is usually in the + // form: + // + // foo = cstexpr, where + // cstexpr := <gotequiv> - "." + <cst> + // cstexpr := <gotequiv> - (<foo> - <offset from @foo base>) + <cst> + // + // After canonicalization by EvaluateAsRelocatable `ME` turns into: + // + // cstexpr := <gotequiv> - <foo> + gotpcrelcst, where + // gotpcrelcst := <offset from @foo base> + <cst> + // + MCValue MV; + if (!(*ME)->EvaluateAsRelocatable(MV, nullptr, nullptr) || MV.isAbsolute()) + return; + + const MCSymbol *GOTEquivSym = &MV.getSymA()->getSymbol(); + if (!AP.GlobalGOTEquivs.count(GOTEquivSym)) + return; + + const GlobalValue *BaseGV = dyn_cast<GlobalValue>(BaseCst); + if (!BaseGV) + return; + + const MCSymbol *BaseSym = AP.getSymbol(BaseGV); + if (BaseSym != &MV.getSymB()->getSymbol()) + return; + + // Make sure to match: + // + // gotpcrelcst := <offset from @foo base> + <cst> + // + int64_t GOTPCRelCst = Offset + MV.getConstant(); + if (GOTPCRelCst < 0) + return; + + // Emit the GOT PC relative to replace the got equivalent global, i.e.: + // + // bar: + // .long 42 + // gotequiv: + // .quad bar + // foo: + // .long gotequiv - "." + <cst> + // + // is replaced by the target specific equivalent to: + // + // bar: + // .long 42 + // foo: + // .long bar@GOTPCREL+<gotpcrelcst> + // + AsmPrinter::GOTEquivUsePair Result = AP.GlobalGOTEquivs[GOTEquivSym]; + const GlobalVariable *GV = Result.first; + unsigned NumUses = Result.second; + const GlobalValue *FinalGV = dyn_cast<GlobalValue>(GV->getOperand(0)); + const MCSymbol *FinalSym = AP.getSymbol(FinalGV); + *ME = AP.getObjFileLowering().getIndirectSymViaGOTPCRel(FinalSym, + GOTPCRelCst); + + // Update GOT equivalent usage information + --NumUses; + if (NumUses) + AP.GlobalGOTEquivs[GOTEquivSym] = std::make_pair(GV, NumUses); + else + AP.GlobalGOTEquivs.erase(GOTEquivSym); +} + +static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP, + const Constant *BaseCV, uint64_t Offset) { + const DataLayout *DL = AP.TM.getDataLayout(); uint64_t Size = DL->getTypeAllocSize(CV->getType()); + + // Globals with sub-elements such as combinations of arrays and structs + // are handled recursively by emitGlobalConstantImpl. Keep track of the + // constant symbol base and the current position with BaseCV and Offset. + if (!BaseCV && CV->hasOneUse()) + BaseCV = dyn_cast<Constant>(CV->user_back()); + if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV)) return AP.OutStreamer.EmitZeros(Size); @@ -1955,10 +2191,10 @@ static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) { return emitGlobalConstantDataSequential(CDS, AP); if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) - return emitGlobalConstantArray(CVA, AP); + return emitGlobalConstantArray(CVA, AP, BaseCV, Offset); if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) - return emitGlobalConstantStruct(CVS, AP); + return emitGlobalConstantStruct(CVS, AP, BaseCV, Offset); if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) { // Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of @@ -1981,13 +2217,21 @@ static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) { // Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it // thread the streamer with EmitValue. - AP.OutStreamer.EmitValue(lowerConstant(CV, AP), Size); + const MCExpr *ME = AP.lowerConstant(CV); + + // Since lowerConstant already folded and got rid of all IR pointer and + // integer casts, detect GOT equivalent accesses by looking into the MCExpr + // directly. + if (AP.getObjFileLowering().supportIndirectSymViaGOTPCRel()) + handleIndirectSymViaGOTPCRel(AP, &ME, BaseCV, Offset); + + AP.OutStreamer.EmitValue(ME, Size); } /// EmitGlobalConstant - Print a general LLVM constant to the .s file. void AsmPrinter::EmitGlobalConstant(const Constant *CV) { uint64_t Size = - TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(CV->getType()); + TM.getDataLayout()->getTypeAllocSize(CV->getType()); if (Size) emitGlobalConstantImpl(CV, *this); else if (MAI->hasSubsectionsViaSymbols()) { @@ -2015,16 +2259,16 @@ void AsmPrinter::printOffset(int64_t Offset, raw_ostream &OS) const { /// GetTempSymbol - Return the MCSymbol corresponding to the assembler /// temporary label with the specified stem and unique ID. -MCSymbol *AsmPrinter::GetTempSymbol(Twine Name, unsigned ID) const { - const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout(); +MCSymbol *AsmPrinter::GetTempSymbol(const Twine &Name, unsigned ID) const { + const DataLayout *DL = TM.getDataLayout(); return OutContext.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix()) + Name + Twine(ID)); } /// GetTempSymbol - Return an assembler temporary label with the specified /// stem. -MCSymbol *AsmPrinter::GetTempSymbol(Twine Name) const { - const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout(); +MCSymbol *AsmPrinter::GetTempSymbol(const Twine &Name) const { + const DataLayout *DL = TM.getDataLayout(); return OutContext.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+ Name); } @@ -2040,7 +2284,7 @@ MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const { /// GetCPISymbol - Return the symbol for the specified constant pool entry. MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const { - const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout(); + const DataLayout *DL = TM.getDataLayout(); return OutContext.GetOrCreateSymbol (Twine(DL->getPrivateGlobalPrefix()) + "CPI" + Twine(getFunctionNumber()) + "_" + Twine(CPID)); @@ -2054,7 +2298,7 @@ MCSymbol *AsmPrinter::GetJTISymbol(unsigned JTID, bool isLinkerPrivate) const { /// GetJTSetSymbol - Return the symbol for the specified jump table .set /// FIXME: privatize to AsmPrinter. MCSymbol *AsmPrinter::GetJTSetSymbol(unsigned UID, unsigned MBBID) const { - const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout(); + const DataLayout *DL = TM.getDataLayout(); return OutContext.GetOrCreateSymbol (Twine(DL->getPrivateGlobalPrefix()) + Twine(getFunctionNumber()) + "_" + Twine(UID) + "_set_" + Twine(MBBID)); @@ -2252,6 +2496,11 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) { if (!S.usesMetadata()) return nullptr; + assert(!S.useStatepoints() && "statepoints do not currently support custom" + " stackmap formats, please see the documentation for a description of" + " the default format. If you really need a custom serialized format," + " please file a bug"); + gcp_map_type &GCMap = getGCMap(GCMetadataPrinters); gcp_map_type::iterator GCPI = GCMap.find(&S); if (GCPI != GCMap.end()) diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index 05f6a68..d0958c1 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -12,26 +12,44 @@ //===----------------------------------------------------------------------===// #include "ByteStreamer.h" -#include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/ADT/SmallBitVector.h" +#include "DwarfExpression.h" #include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MachineLocation.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "asm-printer" +void DebugLocDwarfExpression::EmitOp(uint8_t Op, const char *Comment) { + BS.EmitInt8( + Op, Comment ? Twine(Comment) + " " + dwarf::OperationEncodingString(Op) + : dwarf::OperationEncodingString(Op)); +} + +void DebugLocDwarfExpression::EmitSigned(int Value) { + BS.EmitSLEB128(Value, Twine(Value)); +} + +void DebugLocDwarfExpression::EmitUnsigned(unsigned Value) { + BS.EmitULEB128(Value, Twine(Value)); +} + +bool DebugLocDwarfExpression::isFrameRegister(unsigned MachineReg) { + // This information is not available while emitting .debug_loc entries. + return false; +} + //===----------------------------------------------------------------------===// // Dwarf Emission Helper Routines //===----------------------------------------------------------------------===// @@ -131,7 +149,7 @@ unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const { default: llvm_unreachable("Invalid encoded value."); case dwarf::DW_EH_PE_absptr: - return TM.getSubtargetImpl()->getDataLayout()->getPointerSize(); + return TM.getDataLayout()->getPointerSize(); case dwarf::DW_EH_PE_udata2: return 2; case dwarf::DW_EH_PE_udata4: @@ -187,57 +205,6 @@ void AsmPrinter::EmitSectionOffset(const MCSymbol *Label, EmitLabelDifference(Label, SectionLabel, 4); } -/// Emit a dwarf register operation. -static void emitDwarfRegOp(ByteStreamer &Streamer, int Reg) { - assert(Reg >= 0); - if (Reg < 32) { - Streamer.EmitInt8(dwarf::DW_OP_reg0 + Reg, - dwarf::OperationEncodingString(dwarf::DW_OP_reg0 + Reg)); - } else { - Streamer.EmitInt8(dwarf::DW_OP_regx, "DW_OP_regx"); - Streamer.EmitULEB128(Reg, Twine(Reg)); - } -} - -/// Emit an (double-)indirect dwarf register operation. -static void emitDwarfRegOpIndirect(ByteStreamer &Streamer, int Reg, int Offset, - bool Deref) { - assert(Reg >= 0); - if (Reg < 32) { - Streamer.EmitInt8(dwarf::DW_OP_breg0 + Reg, - dwarf::OperationEncodingString(dwarf::DW_OP_breg0 + Reg)); - } else { - Streamer.EmitInt8(dwarf::DW_OP_bregx, "DW_OP_bregx"); - Streamer.EmitULEB128(Reg, Twine(Reg)); - } - Streamer.EmitSLEB128(Offset); - if (Deref) - Streamer.EmitInt8(dwarf::DW_OP_deref, "DW_OP_deref"); -} - -void AsmPrinter::EmitDwarfOpPiece(ByteStreamer &Streamer, unsigned SizeInBits, - unsigned OffsetInBits) const { - assert(SizeInBits > 0 && "piece has size zero"); - const unsigned SizeOfByte = 8; - if (OffsetInBits > 0 || SizeInBits % SizeOfByte) { - Streamer.EmitInt8(dwarf::DW_OP_bit_piece, "DW_OP_bit_piece"); - Streamer.EmitULEB128(SizeInBits, Twine(SizeInBits)); - Streamer.EmitULEB128(OffsetInBits, Twine(OffsetInBits)); - } else { - Streamer.EmitInt8(dwarf::DW_OP_piece, "DW_OP_piece"); - unsigned ByteSize = SizeInBits / SizeOfByte; - Streamer.EmitULEB128(ByteSize, Twine(ByteSize)); - } -} - -/// Emit a shift-right dwarf expression. -static void emitDwarfOpShr(ByteStreamer &Streamer, - unsigned ShiftBy) { - Streamer.EmitInt8(dwarf::DW_OP_constu, "DW_OP_constu"); - Streamer.EmitULEB128(ShiftBy); - Streamer.EmitInt8(dwarf::DW_OP_shr, "DW_OP_shr"); -} - // Some targets do not provide a DWARF register number for every // register. This function attempts to emit a DWARF register by // emitting a piece of a super-register or by piecing together @@ -247,112 +214,44 @@ void AsmPrinter::EmitDwarfRegOpPiece(ByteStreamer &Streamer, unsigned PieceSizeInBits, unsigned PieceOffsetInBits) const { assert(MLoc.isReg() && "MLoc must be a register"); - const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo(); - int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false); - - // If this is a valid register number, emit it. - if (Reg >= 0) { - emitDwarfRegOp(Streamer, Reg); - EmitDwarfOpPiece(Streamer, PieceSizeInBits, PieceOffsetInBits); - return; - } - - // Walk up the super-register chain until we find a valid number. - // For example, EAX on x86_64 is a 32-bit piece of RAX with offset 0. - for (MCSuperRegIterator SR(MLoc.getReg(), TRI); SR.isValid(); ++SR) { - Reg = TRI->getDwarfRegNum(*SR, false); - if (Reg >= 0) { - unsigned Idx = TRI->getSubRegIndex(*SR, MLoc.getReg()); - unsigned Size = TRI->getSubRegIdxSize(Idx); - unsigned RegOffset = TRI->getSubRegIdxOffset(Idx); - OutStreamer.AddComment("super-register"); - emitDwarfRegOp(Streamer, Reg); - if (PieceOffsetInBits == RegOffset) { - EmitDwarfOpPiece(Streamer, Size, RegOffset); - } else { - // If this is part of a variable in a sub-register at a - // non-zero offset, we need to manually shift the value into - // place, since the DW_OP_piece describes the part of the - // variable, not the position of the subregister. - if (RegOffset) - emitDwarfOpShr(Streamer, RegOffset); - EmitDwarfOpPiece(Streamer, Size, PieceOffsetInBits); - } - return; - } - } - - // Otherwise, attempt to find a covering set of sub-register numbers. - // For example, Q0 on ARM is a composition of D0+D1. - // - // Keep track of the current position so we can emit the more - // efficient DW_OP_piece. - unsigned CurPos = PieceOffsetInBits; - // The size of the register in bits, assuming 8 bits per byte. - unsigned RegSize = TRI->getMinimalPhysRegClass(MLoc.getReg())->getSize() * 8; - // Keep track of the bits in the register we already emitted, so we - // can avoid emitting redundant aliasing subregs. - SmallBitVector Coverage(RegSize, false); - for (MCSubRegIterator SR(MLoc.getReg(), TRI); SR.isValid(); ++SR) { - unsigned Idx = TRI->getSubRegIndex(MLoc.getReg(), *SR); - unsigned Size = TRI->getSubRegIdxSize(Idx); - unsigned Offset = TRI->getSubRegIdxOffset(Idx); - Reg = TRI->getDwarfRegNum(*SR, false); - - // Intersection between the bits we already emitted and the bits - // covered by this subregister. - SmallBitVector Intersection(RegSize, false); - Intersection.set(Offset, Offset + Size); - Intersection ^= Coverage; - - // If this sub-register has a DWARF number and we haven't covered - // its range, emit a DWARF piece for it. - if (Reg >= 0 && Intersection.any()) { - OutStreamer.AddComment("sub-register"); - emitDwarfRegOp(Streamer, Reg); - EmitDwarfOpPiece(Streamer, Size, Offset == CurPos ? 0 : Offset); - CurPos = Offset + Size; - - // Mark it as emitted. - Coverage.set(Offset, Offset + Size); - } - } + DebugLocDwarfExpression Expr(*this, Streamer); + Expr.AddMachineRegPiece(MLoc.getReg(), PieceSizeInBits, PieceOffsetInBits); +} - if (CurPos == PieceOffsetInBits) { - // FIXME: We have no reasonable way of handling errors in here. - Streamer.EmitInt8(dwarf::DW_OP_nop, - "nop (could not find a dwarf register number)"); - } +void AsmPrinter::EmitDwarfOpPiece(ByteStreamer &Streamer, + unsigned PieceSizeInBits, + unsigned PieceOffsetInBits) const { + DebugLocDwarfExpression Expr(*this, Streamer); + Expr.AddOpPiece(PieceSizeInBits, PieceOffsetInBits); } /// EmitDwarfRegOp - Emit dwarf register operation. void AsmPrinter::EmitDwarfRegOp(ByteStreamer &Streamer, - const MachineLocation &MLoc, - bool Indirect) const { - const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo(); - int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false); + const MachineLocation &MLoc) const { + DebugLocDwarfExpression Expr(*this, Streamer); + const MCRegisterInfo *MRI = MMI->getContext().getRegisterInfo(); + int Reg = MRI->getDwarfRegNum(MLoc.getReg(), false); if (Reg < 0) { // We assume that pointers are always in an addressable register. - if (Indirect || MLoc.isIndirect()) { + if (MLoc.isIndirect()) // FIXME: We have no reasonable way of handling errors in here. The // caller might be in the middle of a dwarf expression. We should // probably assert that Reg >= 0 once debug info generation is more // mature. - Streamer.EmitInt8(dwarf::DW_OP_nop, - "nop (invalid dwarf register number for indirect loc)"); - return; - } + return Expr.EmitOp(dwarf::DW_OP_nop, + "nop (could not find a dwarf register number)"); // Attempt to find a valid super- or sub-register. - return EmitDwarfRegOpPiece(Streamer, MLoc); + if (!Expr.AddMachineRegPiece(MLoc.getReg())) + Expr.EmitOp(dwarf::DW_OP_nop, + "nop (could not find a dwarf register number)"); + return; } if (MLoc.isIndirect()) - emitDwarfRegOpIndirect(Streamer, Reg, MLoc.getOffset(), Indirect); - else if (Indirect) - emitDwarfRegOpIndirect(Streamer, Reg, 0, false); + Expr.AddRegIndirect(Reg, MLoc.getOffset()); else - emitDwarfRegOp(Streamer, Reg); + Expr.AddReg(Reg); } //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index cca5f22..e6e7c97 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -32,6 +32,7 @@ #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -64,7 +65,7 @@ static void srcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) { if (LocInfo->getNumOperands() != 0) if (const ConstantInt *CI = - dyn_cast<ConstantInt>(LocInfo->getOperand(ErrorLine))) + mdconst::dyn_extract<ConstantInt>(LocInfo->getOperand(ErrorLine))) LocCookie = CI->getZExtValue(); } @@ -90,8 +91,19 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode, assert(MCAI && "No MCAsmInfo"); if (!MCAI->useIntegratedAssembler() && !OutStreamer.isIntegratedAssemblerRequired()) { + emitInlineAsmStart(); OutStreamer.EmitRawText(Str); - emitInlineAsmEnd(TM.getSubtarget<MCSubtargetInfo>(), nullptr); + // If we have a machine function then grab the MCSubtarget off of that, + // otherwise we're at the module level and want to construct one from + // the default CPU and target triple. + if (MF) { + emitInlineAsmEnd(MF->getSubtarget<MCSubtargetInfo>(), nullptr); + } else { + std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo( + TM.getTargetTriple(), TM.getTargetCPU(), + TM.getTargetFeatureString())); + emitInlineAsmEnd(*STI, nullptr); + } return; } @@ -137,11 +149,13 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode, // emitInlineAsmEnd(). MCSubtargetInfo STIOrig = *STI; - MCTargetOptions MCOptions; - if (MF) - MCOptions = MF->getTarget().Options.MCOptions; - std::unique_ptr<MCTargetAsmParser> TAP( - TM.getTarget().createMCAsmParser(*STI, *Parser, *MII, MCOptions)); + // We create a new MCInstrInfo here since we might be at the module level + // and not have a MachineFunction to initialize the TargetInstrInfo from and + // we only need MCInstrInfo for asm parsing. We create one unconditionally + // because it's not subtarget dependent. + std::unique_ptr<MCInstrInfo> MII(TM.getTarget().createMCInstrInfo()); + std::unique_ptr<MCTargetAsmParser> TAP(TM.getTarget().createMCAsmParser( + *STI, *Parser, *MII, TM.Options.MCOptions)); if (!TAP) report_fatal_error("Inline asm not supported by this streamer because" " we don't have an asm parser for this target\n"); @@ -152,6 +166,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode, TAP->SetFrameRegister(TRI->getFrameRegister(*MF)); } + emitInlineAsmStart(); // Don't implicitly switch to the text section before the asm. int Res = Parser->Run(/*NoInitialTextSection*/ true, /*NoFinalize*/ true); @@ -467,7 +482,8 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { if (MI->getOperand(i-1).isMetadata() && (LocMD = MI->getOperand(i-1).getMetadata()) && LocMD->getNumOperands() != 0) { - if (const ConstantInt *CI = dyn_cast<ConstantInt>(LocMD->getOperand(0))) { + if (const ConstantInt *CI = + mdconst::dyn_extract<ConstantInt>(LocMD->getOperand(0))) { LocCookie = CI->getZExtValue(); break; } @@ -505,7 +521,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { /// for their own strange codes. void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS, const char *Code) const { - const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout(); + const DataLayout *DL = TM.getDataLayout(); if (!strcmp(Code, "private")) { OS << DL->getPrivateGlobalPrefix(); } else if (!strcmp(Code, "comment")) { @@ -566,5 +582,7 @@ bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, return true; } +void AsmPrinter::emitInlineAsmStart() const {} + void AsmPrinter::emitInlineAsmEnd(const MCSubtargetInfo &StartInfo, const MCSubtargetInfo *EndInfo) const {} diff --git a/lib/CodeGen/AsmPrinter/ByteStreamer.h b/lib/CodeGen/AsmPrinter/ByteStreamer.h index 0cc8353..42be114 100644 --- a/lib/CodeGen/AsmPrinter/ByteStreamer.h +++ b/lib/CodeGen/AsmPrinter/ByteStreamer.h @@ -15,10 +15,10 @@ #ifndef LLVM_LIB_CODEGEN_ASMPRINTER_BYTESTREAMER_H #define LLVM_LIB_CODEGEN_ASMPRINTER_BYTESTREAMER_H +#include "DIEHash.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/MC/MCStreamer.h" -#include "DIEHash.h" namespace llvm { class ByteStreamer { diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt index e6b7d64..01d2c72 100644 --- a/lib/CodeGen/AsmPrinter/CMakeLists.txt +++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt @@ -11,6 +11,7 @@ add_llvm_library(LLVMAsmPrinter DwarfCFIException.cpp DwarfCompileUnit.cpp DwarfDebug.cpp + DwarfExpression.cpp DwarfFile.cpp DwarfStringPool.cpp DwarfUnit.cpp diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index 50ea369..64ba56b 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -11,8 +11,7 @@ // //===----------------------------------------------------------------------===// -#include "DIE.h" - +#include "llvm/CodeGen/DIE.h" #include "DwarfCompileUnit.h" #include "DwarfDebug.h" #include "DwarfUnit.h" diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h deleted file mode 100644 index e310aef..0000000 --- a/lib/CodeGen/AsmPrinter/DIE.h +++ /dev/null @@ -1,587 +0,0 @@ -//===--- lib/CodeGen/DIE.h - DWARF Info Entries -----------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Data structures for DWARF info entries. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DIE_H -#define LLVM_LIB_CODEGEN_ASMPRINTER_DIE_H - -#include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/Dwarf.h" -#include <vector> - -namespace llvm { -class AsmPrinter; -class MCExpr; -class MCSymbol; -class raw_ostream; -class DwarfTypeUnit; - -//===--------------------------------------------------------------------===// -/// DIEAbbrevData - Dwarf abbreviation data, describes one attribute of a -/// Dwarf abbreviation. -class DIEAbbrevData { - /// Attribute - Dwarf attribute code. - /// - dwarf::Attribute Attribute; - - /// Form - Dwarf form code. - /// - dwarf::Form Form; - -public: - DIEAbbrevData(dwarf::Attribute A, dwarf::Form F) : Attribute(A), Form(F) {} - - // Accessors. - dwarf::Attribute getAttribute() const { return Attribute; } - dwarf::Form getForm() const { return Form; } - - /// Profile - Used to gather unique data for the abbreviation folding set. - /// - void Profile(FoldingSetNodeID &ID) const; -}; - -//===--------------------------------------------------------------------===// -/// DIEAbbrev - Dwarf abbreviation, describes the organization of a debug -/// information object. -class DIEAbbrev : public FoldingSetNode { - /// Unique number for node. - /// - unsigned Number; - - /// Tag - Dwarf tag code. - /// - dwarf::Tag Tag; - - /// Children - Whether or not this node has children. - /// - // This cheats a bit in all of the uses since the values in the standard - // are 0 and 1 for no children and children respectively. - bool Children; - - /// Data - Raw data bytes for abbreviation. - /// - SmallVector<DIEAbbrevData, 12> Data; - -public: - DIEAbbrev(dwarf::Tag T, bool C) : Tag(T), Children(C), Data() {} - - // Accessors. - dwarf::Tag getTag() const { return Tag; } - unsigned getNumber() const { return Number; } - bool hasChildren() const { return Children; } - const SmallVectorImpl<DIEAbbrevData> &getData() const { return Data; } - void setChildrenFlag(bool hasChild) { Children = hasChild; } - void setNumber(unsigned N) { Number = N; } - - /// AddAttribute - Adds another set of attribute information to the - /// abbreviation. - void AddAttribute(dwarf::Attribute Attribute, dwarf::Form Form) { - Data.push_back(DIEAbbrevData(Attribute, Form)); - } - - /// Profile - Used to gather unique data for the abbreviation folding set. - /// - void Profile(FoldingSetNodeID &ID) const; - - /// Emit - Print the abbreviation using the specified asm printer. - /// - void Emit(AsmPrinter *AP) const; - -#ifndef NDEBUG - void print(raw_ostream &O); - void dump(); -#endif -}; - -//===--------------------------------------------------------------------===// -/// DIE - A structured debug information entry. Has an abbreviation which -/// describes its organization. -class DIEValue; - -class DIE { -protected: - /// Offset - Offset in debug info section. - /// - unsigned Offset; - - /// Size - Size of instance + children. - /// - unsigned Size; - - /// Abbrev - Buffer for constructing abbreviation. - /// - DIEAbbrev Abbrev; - - /// Children DIEs. - /// - // This can't be a vector<DIE> because pointer validity is requirent for the - // Parent pointer and DIEEntry. - // It can't be a list<DIE> because some clients need pointer validity before - // the object has been added to any child list - // (eg: DwarfUnit::constructVariableDIE). These aren't insurmountable, but may - // be more convoluted than beneficial. - std::vector<std::unique_ptr<DIE>> Children; - - DIE *Parent; - - /// Attribute values. - /// - SmallVector<DIEValue *, 12> Values; - -protected: - DIE() - : Offset(0), Size(0), Abbrev((dwarf::Tag)0, dwarf::DW_CHILDREN_no), - Parent(nullptr) {} - -public: - explicit DIE(dwarf::Tag Tag) - : Offset(0), Size(0), Abbrev((dwarf::Tag)Tag, dwarf::DW_CHILDREN_no), - Parent(nullptr) {} - - // Accessors. - DIEAbbrev &getAbbrev() { return Abbrev; } - const DIEAbbrev &getAbbrev() const { return Abbrev; } - unsigned getAbbrevNumber() const { return Abbrev.getNumber(); } - dwarf::Tag getTag() const { return Abbrev.getTag(); } - unsigned getOffset() const { return Offset; } - unsigned getSize() const { return Size; } - const std::vector<std::unique_ptr<DIE>> &getChildren() const { - return Children; - } - const SmallVectorImpl<DIEValue *> &getValues() const { return Values; } - DIE *getParent() const { return Parent; } - /// Climb up the parent chain to get the compile or type unit DIE this DIE - /// belongs to. - const DIE *getUnit() const; - /// Similar to getUnit, returns null when DIE is not added to an - /// owner yet. - const DIE *getUnitOrNull() const; - void setOffset(unsigned O) { Offset = O; } - void setSize(unsigned S) { Size = S; } - - /// addValue - Add a value and attributes to a DIE. - /// - void addValue(dwarf::Attribute Attribute, dwarf::Form Form, DIEValue *Value) { - Abbrev.AddAttribute(Attribute, Form); - Values.push_back(Value); - } - - /// addChild - Add a child to the DIE. - /// - void addChild(std::unique_ptr<DIE> Child) { - assert(!Child->getParent()); - Abbrev.setChildrenFlag(dwarf::DW_CHILDREN_yes); - Child->Parent = this; - Children.push_back(std::move(Child)); - } - - /// findAttribute - Find a value in the DIE with the attribute given, - /// returns NULL if no such attribute exists. - DIEValue *findAttribute(dwarf::Attribute Attribute) const; - -#ifndef NDEBUG - void print(raw_ostream &O, unsigned IndentCount = 0) const; - void dump(); -#endif -}; - -//===--------------------------------------------------------------------===// -/// DIEValue - A debug information entry value. Some of these roughly correlate -/// to DWARF attribute classes. -/// -class DIEValue { - virtual void anchor(); - -public: - enum Type { - isInteger, - isString, - isExpr, - isLabel, - isDelta, - isEntry, - isTypeSignature, - isBlock, - isLoc, - isLocList, - }; - -protected: - /// Ty - Type of data stored in the value. - /// - Type Ty; - - explicit DIEValue(Type T) : Ty(T) {} - virtual ~DIEValue() {} - -public: - // Accessors - Type getType() const { return Ty; } - - /// EmitValue - Emit value via the Dwarf writer. - /// - virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const = 0; - - /// SizeOf - Return the size of a value in bytes. - /// - virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const = 0; - -#ifndef NDEBUG - virtual void print(raw_ostream &O) const = 0; - void dump() const; -#endif -}; - -//===--------------------------------------------------------------------===// -/// DIEInteger - An integer value DIE. -/// -class DIEInteger : public DIEValue { - uint64_t Integer; - -public: - explicit DIEInteger(uint64_t I) : DIEValue(isInteger), Integer(I) {} - - /// BestForm - Choose the best form for integer. - /// - static dwarf::Form BestForm(bool IsSigned, uint64_t Int) { - if (IsSigned) { - const int64_t SignedInt = Int; - if ((char)Int == SignedInt) - return dwarf::DW_FORM_data1; - if ((short)Int == SignedInt) - return dwarf::DW_FORM_data2; - if ((int)Int == SignedInt) - return dwarf::DW_FORM_data4; - } else { - if ((unsigned char)Int == Int) - return dwarf::DW_FORM_data1; - if ((unsigned short)Int == Int) - return dwarf::DW_FORM_data2; - if ((unsigned int)Int == Int) - return dwarf::DW_FORM_data4; - } - return dwarf::DW_FORM_data8; - } - - /// EmitValue - Emit integer of appropriate size. - /// - void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override; - - uint64_t getValue() const { return Integer; } - - /// SizeOf - Determine size of integer value in bytes. - /// - unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override; - - // Implement isa/cast/dyncast. - static bool classof(const DIEValue *I) { return I->getType() == isInteger; } - -#ifndef NDEBUG - void print(raw_ostream &O) const override; -#endif -}; - -//===--------------------------------------------------------------------===// -/// DIEExpr - An expression DIE. -// -class DIEExpr : public DIEValue { - const MCExpr *Expr; - -public: - explicit DIEExpr(const MCExpr *E) : DIEValue(isExpr), Expr(E) {} - - /// EmitValue - Emit expression value. - /// - void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override; - - /// getValue - Get MCExpr. - /// - const MCExpr *getValue() const { return Expr; } - - /// SizeOf - Determine size of expression value in bytes. - /// - unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override; - - // Implement isa/cast/dyncast. - static bool classof(const DIEValue *E) { return E->getType() == isExpr; } - -#ifndef NDEBUG - void print(raw_ostream &O) const override; -#endif -}; - -//===--------------------------------------------------------------------===// -/// DIELabel - A label DIE. -// -class DIELabel : public DIEValue { - const MCSymbol *Label; - -public: - explicit DIELabel(const MCSymbol *L) : DIEValue(isLabel), Label(L) {} - - /// EmitValue - Emit label value. - /// - void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override; - - /// getValue - Get MCSymbol. - /// - const MCSymbol *getValue() const { return Label; } - - /// SizeOf - Determine size of label value in bytes. - /// - unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override; - - // Implement isa/cast/dyncast. - static bool classof(const DIEValue *L) { return L->getType() == isLabel; } - -#ifndef NDEBUG - void print(raw_ostream &O) const override; -#endif -}; - -//===--------------------------------------------------------------------===// -/// DIEDelta - A simple label difference DIE. -/// -class DIEDelta : public DIEValue { - const MCSymbol *LabelHi; - const MCSymbol *LabelLo; - -public: - DIEDelta(const MCSymbol *Hi, const MCSymbol *Lo) - : DIEValue(isDelta), LabelHi(Hi), LabelLo(Lo) {} - - /// EmitValue - Emit delta value. - /// - void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override; - - /// SizeOf - Determine size of delta value in bytes. - /// - unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override; - - // Implement isa/cast/dyncast. - static bool classof(const DIEValue *D) { return D->getType() == isDelta; } - -#ifndef NDEBUG - void print(raw_ostream &O) const override; -#endif -}; - -//===--------------------------------------------------------------------===// -/// DIEString - A container for string values. -/// -class DIEString : public DIEValue { - const DIEValue *Access; - StringRef Str; - -public: - DIEString(const DIEValue *Acc, StringRef S) - : DIEValue(isString), Access(Acc), Str(S) {} - - /// getString - Grab the string out of the object. - StringRef getString() const { return Str; } - - /// EmitValue - Emit delta value. - /// - void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override; - - /// SizeOf - Determine size of delta value in bytes. - /// - unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override; - - // Implement isa/cast/dyncast. - static bool classof(const DIEValue *D) { return D->getType() == isString; } - -#ifndef NDEBUG - void print(raw_ostream &O) const override; -#endif -}; - -//===--------------------------------------------------------------------===// -/// DIEEntry - A pointer to another debug information entry. An instance of -/// this class can also be used as a proxy for a debug information entry not -/// yet defined (ie. types.) -class DIEEntry : public DIEValue { - DIE &Entry; - -public: - explicit DIEEntry(DIE &E) : DIEValue(isEntry), Entry(E) { - } - - DIE &getEntry() const { return Entry; } - - /// EmitValue - Emit debug information entry offset. - /// - void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override; - - /// SizeOf - Determine size of debug information entry in bytes. - /// - unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override { - return Form == dwarf::DW_FORM_ref_addr ? getRefAddrSize(AP) - : sizeof(int32_t); - } - - /// Returns size of a ref_addr entry. - static unsigned getRefAddrSize(AsmPrinter *AP); - - // Implement isa/cast/dyncast. - static bool classof(const DIEValue *E) { return E->getType() == isEntry; } - -#ifndef NDEBUG - void print(raw_ostream &O) const override; -#endif -}; - -//===--------------------------------------------------------------------===// -/// \brief A signature reference to a type unit. -class DIETypeSignature : public DIEValue { - const DwarfTypeUnit &Unit; - -public: - explicit DIETypeSignature(const DwarfTypeUnit &Unit) - : DIEValue(isTypeSignature), Unit(Unit) {} - - /// \brief Emit type unit signature. - void EmitValue(AsmPrinter *Asm, dwarf::Form Form) const override; - - /// Returns size of a ref_sig8 entry. - unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override { - assert(Form == dwarf::DW_FORM_ref_sig8); - return 8; - } - - // \brief Implement isa/cast/dyncast. - static bool classof(const DIEValue *E) { - return E->getType() == isTypeSignature; - } -#ifndef NDEBUG - void print(raw_ostream &O) const override; - void dump() const; -#endif -}; - -//===--------------------------------------------------------------------===// -/// DIELoc - Represents an expression location. -// -class DIELoc : public DIEValue, public DIE { - mutable unsigned Size; // Size in bytes excluding size header. -public: - DIELoc() : DIEValue(isLoc), Size(0) {} - - /// ComputeSize - Calculate the size of the location expression. - /// - unsigned ComputeSize(AsmPrinter *AP) const; - - /// BestForm - Choose the best form for data. - /// - dwarf::Form BestForm(unsigned DwarfVersion) const { - if (DwarfVersion > 3) - return dwarf::DW_FORM_exprloc; - // Pre-DWARF4 location expressions were blocks and not exprloc. - if ((unsigned char)Size == Size) - return dwarf::DW_FORM_block1; - if ((unsigned short)Size == Size) - return dwarf::DW_FORM_block2; - if ((unsigned int)Size == Size) - return dwarf::DW_FORM_block4; - return dwarf::DW_FORM_block; - } - - /// EmitValue - Emit location data. - /// - void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override; - - /// SizeOf - Determine size of location data in bytes. - /// - unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override; - - // Implement isa/cast/dyncast. - static bool classof(const DIEValue *E) { return E->getType() == isLoc; } - -#ifndef NDEBUG - void print(raw_ostream &O) const override; -#endif -}; - -//===--------------------------------------------------------------------===// -/// DIEBlock - Represents a block of values. -// -class DIEBlock : public DIEValue, public DIE { - mutable unsigned Size; // Size in bytes excluding size header. -public: - DIEBlock() : DIEValue(isBlock), Size(0) {} - - /// ComputeSize - Calculate the size of the location expression. - /// - unsigned ComputeSize(AsmPrinter *AP) const; - - /// BestForm - Choose the best form for data. - /// - dwarf::Form BestForm() const { - if ((unsigned char)Size == Size) - return dwarf::DW_FORM_block1; - if ((unsigned short)Size == Size) - return dwarf::DW_FORM_block2; - if ((unsigned int)Size == Size) - return dwarf::DW_FORM_block4; - return dwarf::DW_FORM_block; - } - - /// EmitValue - Emit location data. - /// - void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override; - - /// SizeOf - Determine size of location data in bytes. - /// - unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override; - - // Implement isa/cast/dyncast. - static bool classof(const DIEValue *E) { return E->getType() == isBlock; } - -#ifndef NDEBUG - void print(raw_ostream &O) const override; -#endif -}; - -//===--------------------------------------------------------------------===// -/// DIELocList - Represents a pointer to a location list in the debug_loc -/// section. -// -class DIELocList : public DIEValue { - // Index into the .debug_loc vector. - size_t Index; - -public: - DIELocList(size_t I) : DIEValue(isLocList), Index(I) {} - - /// getValue - Grab the current index out. - size_t getValue() const { return Index; } - - /// EmitValue - Emit location data. - /// - void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override; - - /// SizeOf - Determine size of location data in bytes. - /// - unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override; - - // Implement isa/cast/dyncast. - static bool classof(const DIEValue *E) { return E->getType() == isLocList; } - -#ifndef NDEBUG - void print(raw_ostream &O) const override; -#endif -}; - -} // end llvm namespace - -#endif diff --git a/lib/CodeGen/AsmPrinter/DIEHash.cpp b/lib/CodeGen/AsmPrinter/DIEHash.cpp index b2a3ba8..1e2ba2c 100644 --- a/lib/CodeGen/AsmPrinter/DIEHash.cpp +++ b/lib/CodeGen/AsmPrinter/DIEHash.cpp @@ -13,11 +13,11 @@ #include "ByteStreamer.h" #include "DIEHash.h" -#include "DIE.h" #include "DwarfDebug.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DIE.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/Endian.h" diff --git a/lib/CodeGen/AsmPrinter/DIEHash.h b/lib/CodeGen/AsmPrinter/DIEHash.h index 872aa0e..ac014b7 100644 --- a/lib/CodeGen/AsmPrinter/DIEHash.h +++ b/lib/CodeGen/AsmPrinter/DIEHash.h @@ -14,8 +14,8 @@ #ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DIEHASH_H #define LLVM_LIB_CODEGEN_ASMPRINTER_DIEHASH_H -#include "DIE.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/DIE.h" #include "llvm/Support/MD5.h" namespace llvm { diff --git a/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/lib/CodeGen/AsmPrinter/DebugLocEntry.h index 6cca985..6d55c03 100644 --- a/lib/CodeGen/AsmPrinter/DebugLocEntry.h +++ b/lib/CodeGen/AsmPrinter/DebugLocEntry.h @@ -11,8 +11,8 @@ #define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" -#include "llvm/MC/MachineLocation.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MachineLocation.h" namespace llvm { class MDNode; @@ -74,7 +74,7 @@ public: MachineLocation getLoc() const { return Loc; } const MDNode *getVariableNode() const { return Variable; } DIVariable getVariable() const { return DIVariable(Variable); } - bool isVariablePiece() const { return getExpression().isVariablePiece(); } + bool isBitPiece() const { return getExpression().isBitPiece(); } DIExpression getExpression() const { return DIExpression(Expression); } friend bool operator==(const Value &, const Value &); friend bool operator<(const Value &, const Value &); @@ -101,8 +101,8 @@ public: DIVariable Var(Values[0].Variable); DIExpression NextExpr(Next.Values[0].Expression); DIVariable NextVar(Next.Values[0].Variable); - if (Var == NextVar && Expr.isVariablePiece() && - NextExpr.isVariablePiece()) { + if (Var == NextVar && Expr.isBitPiece() && + NextExpr.isBitPiece()) { addValues(Next.Values); End = Next.End; return true; @@ -131,7 +131,7 @@ public: Values.append(Vals.begin(), Vals.end()); sortUniqueValues(); assert(std::all_of(Values.begin(), Values.end(), [](DebugLocEntry::Value V){ - return V.isVariablePiece(); + return V.isBitPiece(); }) && "value must be a piece"); } @@ -176,8 +176,8 @@ inline bool operator==(const DebugLocEntry::Value &A, /// Compare two pieces based on their offset. inline bool operator<(const DebugLocEntry::Value &A, const DebugLocEntry::Value &B) { - return A.getExpression().getPieceOffset() < - B.getExpression().getPieceOffset(); + return A.getExpression().getBitPieceOffset() < + B.getExpression().getBitPieceOffset(); } } diff --git a/lib/CodeGen/AsmPrinter/DebugLocList.h b/lib/CodeGen/AsmPrinter/DebugLocList.h index 2a4f58f..0f1d2ed 100644 --- a/lib/CodeGen/AsmPrinter/DebugLocList.h +++ b/lib/CodeGen/AsmPrinter/DebugLocList.h @@ -10,8 +10,8 @@ #ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCLIST_H #define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCLIST_H -#include "llvm/ADT/SmallVector.h" #include "DebugLocEntry.h" +#include "llvm/ADT/SmallVector.h" namespace llvm { class DwarfCompileUnit; diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp index 7e87566..a71f35e 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp @@ -12,12 +12,12 @@ //===----------------------------------------------------------------------===// #include "DwarfAccelTable.h" -#include "DIE.h" #include "DwarfCompileUnit.h" #include "DwarfDebug.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DIE.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h index 3cdf678..74963da 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h @@ -14,9 +14,9 @@ #ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFACCELTABLE_H #define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFACCELTABLE_H -#include "DIE.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringMap.h" +#include "llvm/CodeGen/DIE.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/DataTypes.h" @@ -215,8 +215,8 @@ private: #endif }; - DwarfAccelTable(const DwarfAccelTable &) LLVM_DELETED_FUNCTION; - void operator=(const DwarfAccelTable &) LLVM_DELETED_FUNCTION; + DwarfAccelTable(const DwarfAccelTable &) = delete; + void operator=(const DwarfAccelTable &) = delete; // Internal Functions void EmitHeader(AsmPrinter *); diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index 0dc52da..f45b24c 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -51,7 +51,8 @@ void DwarfCFIException::endModule() { if (moveTypeModule == AsmPrinter::CFI_M_Debug) Asm->OutStreamer.EmitCFISections(false, true); - if (!Asm->MAI->usesItaniumLSDAForExceptions()) + // SjLj uses this pass and it doesn't need this info. + if (!Asm->MAI->usesCFIForEH()) return; const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); @@ -90,7 +91,7 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) { const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); unsigned PerEncoding = TLOF.getPersonalityEncoding(); - const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()]; + const Function *Per = MMI->getPersonality(); shouldEmitPersonality = hasLandingPads && PerEncoding != dwarf::DW_EH_PE_omit && Per; diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 2f1b0e5..dcc5fe4 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -1,5 +1,5 @@ #include "DwarfCompileUnit.h" - +#include "DwarfExpression.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalValue.h" @@ -10,8 +10,8 @@ #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" namespace llvm { @@ -103,7 +103,7 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(DIGlobalVariable GV) { assert(GV.isGlobalVariable()); - DIScope GVContext = DD->resolve(GV.getContext()); + DIScope GVContext = GV.getContext(); DIType GTy = DD->resolve(GV.getType()); // Construct the context before querying for the existence of the DIE in @@ -122,7 +122,7 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(DIGlobalVariable GV) { DIE *VariableSpecDIE = getOrCreateStaticMemberDIE(SDMDecl); addDIEEntry(*VariableDIE, dwarf::DW_AT_specification, *VariableSpecDIE); } else { - DeclContext = resolve(GV.getContext()); + DeclContext = GV.getContext(); // Add name and type. addString(*VariableDIE, dwarf::DW_AT_name, GV.getDisplayName()); addType(*VariableDIE, GTy); @@ -292,10 +292,10 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(DISubprogram SP) { // Only include DW_AT_frame_base in full debug info if (!includeMinimalInlineScopes()) { - const TargetRegisterInfo *RI = - Asm->TM.getSubtargetImpl()->getRegisterInfo(); + const TargetRegisterInfo *RI = Asm->MF->getSubtarget().getRegisterInfo(); MachineLocation Location(RI->getFrameRegister(*Asm->MF)); - addAddress(*SPDie, dwarf::DW_AT_frame_base, Location); + if (RI->isPhysicalRegister(Location.getReg())) + addAddress(*SPDie, dwarf::DW_AT_frame_base, Location); } // Add name to the name table, we do this here because we're guaranteed @@ -515,15 +515,23 @@ DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, } // .. else use frame index. - int FI = DV.getFrameIndex(); - if (FI != ~0) { + if (DV.getFrameIndex().back() == ~0) + return VariableDie; + + auto Expr = DV.getExpression().begin(); + DIELoc *Loc = new (DIEValueAllocator) DIELoc(); + DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); + for (auto FI : DV.getFrameIndex()) { unsigned FrameReg = 0; - const TargetFrameLowering *TFI = - Asm->TM.getSubtargetImpl()->getFrameLowering(); + const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering(); int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg); - MachineLocation Location(FrameReg, Offset); - addVariableAddress(DV, *VariableDie, Location); + assert(Expr != DV.getExpression().end() && + "Wrong number of expressions"); + DwarfExpr.AddMachineRegIndirect(FrameReg, Offset); + DwarfExpr.AddExpression(Expr->begin(), Expr->end()); + ++Expr; } + addBlock(*VariableDie, dwarf::DW_AT_location, Loc); return VariableDie; } @@ -694,7 +702,7 @@ void DwarfCompileUnit::collectDeadVariables(DISubprogram SP) { for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) { DIVariable DV(Variables.getElement(vi)); assert(DV.isVariable()); - DbgVariable NewVar(DV, DIExpression(nullptr), DD); + DbgVariable NewVar(DV, DIExpression(), DD); auto VariableDie = constructVariableDIE(NewVar); applyVariableAttributes(NewVar, *VariableDie); SPDIE->addChild(std::move(VariableDie)); @@ -736,24 +744,22 @@ void DwarfCompileUnit::addVariableAddress(const DbgVariable &DV, DIE &Die, else if (DV.isBlockByrefVariable()) addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location); else - addAddress(Die, dwarf::DW_AT_location, Location, - DV.getVariable().isIndirect()); + addAddress(Die, dwarf::DW_AT_location, Location); } /// Add an address attribute to a die based on the location provided. void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute, - const MachineLocation &Location, - bool Indirect) { + const MachineLocation &Location) { DIELoc *Loc = new (DIEValueAllocator) DIELoc(); - if (Location.isReg() && !Indirect) - addRegisterOpPiece(*Loc, Location.getReg()); - else { - addRegisterOffset(*Loc, Location.getReg(), Location.getOffset()); - if (Indirect && !Location.isReg()) { - addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); - } - } + bool validReg; + if (Location.isReg()) + validReg = addRegisterOpPiece(*Loc, Location.getReg()); + else + validReg = addRegisterOffset(*Loc, Location.getReg(), Location.getOffset()); + + if (!validReg) + return; // Now attach the location information to the DIE. addBlock(Die, Attribute, Loc); @@ -767,53 +773,21 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die, dwarf::Attribute Attribute, const MachineLocation &Location) { DIELoc *Loc = new (DIEValueAllocator) DIELoc(); - unsigned N = DV.getNumAddrElements(); - unsigned i = 0; - if (Location.isReg()) { - if (N >= 2 && DV.getAddrElement(0) == dwarf::DW_OP_plus) { - assert(!DV.getVariable().isIndirect() && - "double indirection not handled"); - // If first address element is OpPlus then emit - // DW_OP_breg + Offset instead of DW_OP_reg + Offset. - addRegisterOffset(*Loc, Location.getReg(), DV.getAddrElement(1)); - i = 2; - } else if (N >= 2 && DV.getAddrElement(0) == dwarf::DW_OP_deref) { - assert(!DV.getVariable().isIndirect() && - "double indirection not handled"); - addRegisterOpPiece(*Loc, Location.getReg(), - DV.getExpression().getPieceSize(), - DV.getExpression().getPieceOffset()); - i = 3; - } else - addRegisterOpPiece(*Loc, Location.getReg()); + DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); + assert(DV.getExpression().size() == 1); + DIExpression Expr = DV.getExpression().back(); + bool ValidReg; + if (Location.getOffset()) { + ValidReg = DwarfExpr.AddMachineRegIndirect(Location.getReg(), + Location.getOffset()); + if (ValidReg) + DwarfExpr.AddExpression(Expr.begin(), Expr.end()); } else - addRegisterOffset(*Loc, Location.getReg(), Location.getOffset()); - - for (; i < N; ++i) { - uint64_t Element = DV.getAddrElement(i); - if (Element == dwarf::DW_OP_plus) { - addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - addUInt(*Loc, dwarf::DW_FORM_udata, DV.getAddrElement(++i)); - - } else if (Element == dwarf::DW_OP_deref) { - if (!Location.isReg()) - addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); - - } else if (Element == dwarf::DW_OP_piece) { - const unsigned SizeOfByte = 8; - unsigned PieceOffsetInBits = DV.getAddrElement(++i) * SizeOfByte; - unsigned PieceSizeInBits = DV.getAddrElement(++i) * SizeOfByte; - // Emit DW_OP_bit_piece Size Offset. - assert(PieceSizeInBits > 0 && "piece has zero size"); - addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_bit_piece); - addUInt(*Loc, dwarf::DW_FORM_udata, PieceSizeInBits); - addUInt(*Loc, dwarf::DW_FORM_udata, PieceOffsetInBits); - } else - llvm_unreachable("unknown DIBuilder Opcode"); - } + ValidReg = DwarfExpr.AddMachineRegExpression(Expr, Location.getReg()); // Now attach the location information to the DIE. - addBlock(Die, Attribute, Loc); + if (ValidReg) + addBlock(Die, Attribute, Loc); } /// Add a Dwarf loclistptr attribute data and value. diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index e521f39..c66af65 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -15,9 +15,9 @@ #define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H #include "DwarfUnit.h" -#include "llvm/Support/Dwarf.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/DebugInfo.h" +#include "llvm/Support/Dwarf.h" namespace llvm { @@ -213,7 +213,7 @@ public: MachineLocation Location); /// Add an address attribute to a die based on the location provided. void addAddress(DIE &Die, dwarf::Attribute Attribute, - const MachineLocation &Location, bool Indirect = false); + const MachineLocation &Location); /// Start with the address based on the location provided, and generate the /// DWARF information necessary to find the actual variable (navigating the diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 230ea46..aa1f79f 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -12,16 +12,16 @@ //===----------------------------------------------------------------------===// #include "DwarfDebug.h" - #include "ByteStreamer.h" -#include "DwarfCompileUnit.h" -#include "DIE.h" #include "DIEHash.h" +#include "DwarfCompileUnit.h" +#include "DwarfExpression.h" #include "DwarfUnit.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" +#include "llvm/CodeGen/DIE.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/IR/Constants.h" @@ -490,9 +490,6 @@ void DwarfDebug::beginModule() { // Tell MMI that we have debug info. MMI->setDebugInfoAvailability(true); - - // Prime section data. - SectionMap[Asm->getObjFileLowering().getTextSection()]; } void DwarfDebug::finishVariableDefinitions() { @@ -608,53 +605,6 @@ void DwarfDebug::finalizeModuleInfo() { SkeletonHolder.computeSizeAndOffsets(); } -void DwarfDebug::endSections() { - // Filter labels by section. - for (const SymbolCU &SCU : ArangeLabels) { - if (SCU.Sym->isInSection()) { - // Make a note of this symbol and it's section. - const MCSection *Section = &SCU.Sym->getSection(); - if (!Section->getKind().isMetadata()) - SectionMap[Section].push_back(SCU); - } else { - // Some symbols (e.g. common/bss on mach-o) can have no section but still - // appear in the output. This sucks as we rely on sections to build - // arange spans. We can do it without, but it's icky. - SectionMap[nullptr].push_back(SCU); - } - } - - // Build a list of sections used. - std::vector<const MCSection *> Sections; - for (const auto &it : SectionMap) { - const MCSection *Section = it.first; - Sections.push_back(Section); - } - - // Sort the sections into order. - // This is only done to ensure consistent output order across different runs. - std::sort(Sections.begin(), Sections.end(), SectionSort); - - // Add terminating symbols for each section. - for (unsigned ID = 0, E = Sections.size(); ID != E; ID++) { - const MCSection *Section = Sections[ID]; - MCSymbol *Sym = nullptr; - - if (Section) { - // We can't call MCSection::getLabelEndName, as it's only safe to do so - // if we know the section name up-front. For user-created sections, the - // resulting label may not be valid to use as a label. (section names can - // use a greater set of characters on some systems) - Sym = Asm->GetTempSymbol("debug_end", ID); - Asm->OutStreamer.SwitchSection(Section); - Asm->OutStreamer.EmitLabel(Sym); - } - - // Insert a final terminator. - SectionMap[Section].push_back(SymbolCU(nullptr, Sym)); - } -} - // Emit all Dwarf sections that should come after the content. void DwarfDebug::endModule() { assert(CurFn == nullptr); @@ -666,10 +616,6 @@ void DwarfDebug::endModule() { if (!DwarfInfoSectionSym) return; - // End any existing sections. - // TODO: Does this need to happen? - endSections(); - // Finalize the debug info for the module. finalizeModuleInfo(); @@ -783,10 +729,9 @@ void DwarfDebug::collectVariableInfoFromMMITable( DIVariable DV(VI.Var); DIExpression Expr(VI.Expr); ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode()); - ConcreteVariables.push_back(make_unique<DbgVariable>(DV, Expr, this)); - DbgVariable *RegVar = ConcreteVariables.back().get(); - RegVar->setFrameIndex(VI.Slot); - InfoHolder.addScopeVariable(Scope, RegVar); + auto RegVar = make_unique<DbgVariable>(DV, Expr, this, VI.Slot); + if (InfoHolder.addScopeVariable(Scope, RegVar.get())) + ConcreteVariables.push_back(std::move(RegVar)); } } @@ -818,12 +763,12 @@ static DebugLocEntry::Value getDebugLocValue(const MachineInstr *MI) { /// Determine whether two variable pieces overlap. static bool piecesOverlap(DIExpression P1, DIExpression P2) { - if (!P1.isVariablePiece() || !P2.isVariablePiece()) + if (!P1.isBitPiece() || !P2.isBitPiece()) return true; - unsigned l1 = P1.getPieceOffset(); - unsigned l2 = P2.getPieceOffset(); - unsigned r1 = l1 + P1.getPieceSize(); - unsigned r2 = l2 + P2.getPieceSize(); + unsigned l1 = P1.getBitPieceOffset(); + unsigned l2 = P2.getBitPieceOffset(); + unsigned r1 = l1 + P1.getBitPieceSize(); + unsigned r2 = l2 + P2.getBitPieceSize(); // True where [l1,r1[ and [r1,r2[ overlap. return (l1 < r2) && (l2 < r1); } @@ -842,7 +787,8 @@ static bool piecesOverlap(DIExpression P1, DIExpression P2) { // 1 | | [x, (reg1, piece 32, 32)] <- IsPieceOfPrevEntry // 2 | | ... // 3 | [clobber reg0] -// 4 [x, (mem, piece 0, 64)] <- overlapping with both previous pieces of x. +// 4 [x, (mem, piece 0, 64)] <- overlapping with both previous pieces of +// x. // // Output: // @@ -894,7 +840,7 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc, bool couldMerge = false; // If this is a piece, it may belong to the current DebugLocEntry. - if (DIExpr.isVariablePiece()) { + if (DIExpr.isBitPiece()) { // Add this value to the list of open ranges. OpenRanges.push_back(Value); @@ -950,11 +896,9 @@ DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, DISubprogram SP, continue; LexicalScope *Scope = nullptr; - if (MDNode *IA = DV.getInlinedAt()) { - DebugLoc DL = DebugLoc::getFromDILocation(IA); - Scope = LScopes.findInlinedScope(DebugLoc::get( - DL.getLine(), DL.getCol(), DV.getContext(), IA)); - } else + if (MDNode *IA = DV.getInlinedAt()) + Scope = LScopes.findInlinedScope(DV.getContext(), IA); + else Scope = LScopes.findLexicalScope(DV.getContext()); // If variable scope is not found then skip this variable. if (!Scope) @@ -1026,8 +970,10 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { if (DL == PrologEndLoc) { Flags |= DWARF2_FLAG_PROLOGUE_END; PrologEndLoc = DebugLoc(); + Flags |= DWARF2_FLAG_IS_STMT; } - if (PrologEndLoc.isUnknown()) + if (DL.getLine() != + Asm->OutStreamer.getContext().getCurrentDwarfLoc().getLine()) Flags |= DWARF2_FLAG_IS_STMT; if (!DL.isUnknown()) { @@ -1117,8 +1063,12 @@ static DebugLoc findPrologueEndLoc(const MachineFunction *MF) { for (const auto &MBB : *MF) for (const auto &MI : MBB) if (!MI.isDebugValue() && !MI.getFlag(MachineInstr::FrameSetup) && - !MI.getDebugLoc().isUnknown()) + !MI.getDebugLoc().isUnknown()) { + // Did the target forget to set the FrameSetup flag for CFI insns? + assert(!MI.isCFIInstruction() && + "First non-frame-setup instruction is a CFI instruction."); return MI.getDebugLoc(); + } return DebugLoc(); } @@ -1172,7 +1122,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { Asm->OutStreamer.EmitLabel(FunctionBeginSym); // Calculate history for local variables. - calculateDbgValueHistory(MF, Asm->TM.getSubtargetImpl()->getRegisterInfo(), + calculateDbgValueHistory(MF, Asm->MF->getSubtarget().getRegisterInfo(), DbgValues); // Request labels for the full history. @@ -1187,7 +1137,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { if (DIVar.isVariable() && DIVar.getTag() == dwarf::DW_TAG_arg_variable && getDISubprogram(DIVar.getContext()).describes(MF->getFunction())) { LabelsBeforeInsn[Ranges.front().first] = FunctionBeginSym; - if (Ranges.front().first->getDebugExpression().isVariablePiece()) { + if (Ranges.front().first->getDebugExpression().isBitPiece()) { // Mark all non-overlapping initial pieces. for (auto I = Ranges.begin(); I != Ranges.end(); ++I) { DIExpression Piece = I->first->getDebugExpression(); @@ -1217,12 +1167,12 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { if (!PrologEndLoc.isUnknown()) { DebugLoc FnStartDL = PrologEndLoc.getFnDebugLoc(MF->getFunction()->getContext()); - recordSourceLine( - FnStartDL.getLine(), FnStartDL.getCol(), - FnStartDL.getScope(MF->getFunction()->getContext()), - // We'd like to list the prologue as "not statements" but GDB behaves - // poorly if we do that. Revisit this with caution/GDB (7.5+) testing. - DWARF2_FLAG_IS_STMT); + + // We'd like to list the prologue as "not statements" but GDB behaves + // poorly if we do that. Revisit this with caution/GDB (7.5+) testing. + recordSourceLine(FnStartDL.getLine(), FnStartDL.getCol(), + FnStartDL.getScope(MF->getFunction()->getContext()), + DWARF2_FLAG_IS_STMT); } } @@ -1350,8 +1300,8 @@ void DwarfDebug::emitSectionLabels() { if (useSplitDwarf()) { DwarfInfoDWOSectionSym = emitSectionSym(Asm, TLOF.getDwarfInfoDWOSection(), "section_info_dwo"); - DwarfTypesDWOSectionSym = - emitSectionSym(Asm, TLOF.getDwarfTypesDWOSection(), "section_types_dwo"); + DwarfTypesDWOSectionSym = emitSectionSym( + Asm, TLOF.getDwarfTypesDWOSection(), "section_types_dwo"); } DwarfAbbrevSectionSym = emitSectionSym(Asm, TLOF.getDwarfAbbrevSection(), "section_abbrev"); @@ -1553,7 +1503,6 @@ static dwarf::PubIndexEntryDescriptor computeIndexValue(DwarfUnit *CU, return dwarf::GIEK_TYPE; case dwarf::DW_TAG_subprogram: return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_FUNCTION, Linkage); - case dwarf::DW_TAG_constant: case dwarf::DW_TAG_variable: return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_VARIABLE, Linkage); case dwarf::DW_TAG_enumerator: @@ -1656,7 +1605,7 @@ void DwarfDebug::emitLocPieces(ByteStreamer &Streamer, const DITypeIdentifierMap &Map, ArrayRef<DebugLocEntry::Value> Values) { assert(std::all_of(Values.begin(), Values.end(), [](DebugLocEntry::Value P) { - return P.isVariablePiece(); + return P.isBitPiece(); }) && "all values are expected to be pieces"); assert(std::is_sorted(Values.begin(), Values.end()) && "pieces are expected to be sorted"); @@ -1664,35 +1613,25 @@ void DwarfDebug::emitLocPieces(ByteStreamer &Streamer, unsigned Offset = 0; for (auto Piece : Values) { DIExpression Expr = Piece.getExpression(); - unsigned PieceOffset = Expr.getPieceOffset(); - unsigned PieceSize = Expr.getPieceSize(); + unsigned PieceOffset = Expr.getBitPieceOffset(); + unsigned PieceSize = Expr.getBitPieceSize(); assert(Offset <= PieceOffset && "overlapping or duplicate pieces"); if (Offset < PieceOffset) { // The DWARF spec seriously mandates pieces with no locations for gaps. - Asm->EmitDwarfOpPiece(Streamer, (PieceOffset-Offset)*8); + Asm->EmitDwarfOpPiece(Streamer, PieceOffset-Offset); Offset += PieceOffset-Offset; } - Offset += PieceSize; - const unsigned SizeOfByte = 8; #ifndef NDEBUG DIVariable Var = Piece.getVariable(); - assert(!Var.isIndirect() && "indirect address for piece"); unsigned VarSize = Var.getSizeInBits(Map); - assert(PieceSize+PieceOffset <= VarSize/SizeOfByte + assert(PieceSize+PieceOffset <= VarSize && "piece is larger than or outside of variable"); - assert(PieceSize*SizeOfByte != VarSize + assert(PieceSize != VarSize && "piece covers entire variable"); #endif - if (Piece.isLocation() && Piece.getLoc().isReg()) - Asm->EmitDwarfRegOpPiece(Streamer, - Piece.getLoc(), - PieceSize*SizeOfByte); - else { - emitDebugLocValue(Streamer, Piece); - Asm->EmitDwarfOpPiece(Streamer, PieceSize*SizeOfByte); - } + emitDebugLocValue(Streamer, Piece, PieceOffset); } } @@ -1700,7 +1639,7 @@ void DwarfDebug::emitLocPieces(ByteStreamer &Streamer, void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer, const DebugLocEntry &Entry) { const DebugLocEntry::Value Value = Entry.getValues()[0]; - if (Value.isVariablePiece()) + if (Value.isBitPiece()) // Emit all pieces that belong to the same variable and range. return emitLocPieces(Streamer, TypeIdentifierMap, Entry.getValues()); @@ -1709,62 +1648,33 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer, } void DwarfDebug::emitDebugLocValue(ByteStreamer &Streamer, - const DebugLocEntry::Value &Value) { + const DebugLocEntry::Value &Value, + unsigned PieceOffsetInBits) { DIVariable DV = Value.getVariable(); + DebugLocDwarfExpression DwarfExpr(*Asm, Streamer); + // Regular entry. if (Value.isInt()) { DIBasicType BTy(resolve(DV.getType())); if (BTy.Verify() && (BTy.getEncoding() == dwarf::DW_ATE_signed || - BTy.getEncoding() == dwarf::DW_ATE_signed_char)) { - Streamer.EmitInt8(dwarf::DW_OP_consts, "DW_OP_consts"); - Streamer.EmitSLEB128(Value.getInt()); - } else { - Streamer.EmitInt8(dwarf::DW_OP_constu, "DW_OP_constu"); - Streamer.EmitULEB128(Value.getInt()); - } + BTy.getEncoding() == dwarf::DW_ATE_signed_char)) + DwarfExpr.AddSignedConstant(Value.getInt()); + else + DwarfExpr.AddUnsignedConstant(Value.getInt()); } else if (Value.isLocation()) { MachineLocation Loc = Value.getLoc(); DIExpression Expr = Value.getExpression(); - if (!Expr) + if (!Expr || (Expr.getNumElements() == 0)) // Regular entry. - Asm->EmitDwarfRegOp(Streamer, Loc, DV.isIndirect()); + Asm->EmitDwarfRegOp(Streamer, Loc); else { // Complex address entry. - unsigned N = Expr.getNumElements(); - unsigned i = 0; - if (N >= 2 && Expr.getElement(0) == dwarf::DW_OP_plus) { - if (Loc.getOffset()) { - i = 2; - Asm->EmitDwarfRegOp(Streamer, Loc, DV.isIndirect()); - Streamer.EmitInt8(dwarf::DW_OP_deref, "DW_OP_deref"); - Streamer.EmitInt8(dwarf::DW_OP_plus_uconst, "DW_OP_plus_uconst"); - Streamer.EmitSLEB128(Expr.getElement(1)); - } else { - // If first address element is OpPlus then emit - // DW_OP_breg + Offset instead of DW_OP_reg + Offset. - MachineLocation TLoc(Loc.getReg(), Expr.getElement(1)); - Asm->EmitDwarfRegOp(Streamer, TLoc, DV.isIndirect()); - i = 2; - } - } else { - Asm->EmitDwarfRegOp(Streamer, Loc, DV.isIndirect()); - } - - // Emit remaining complex address elements. - for (; i < N; ++i) { - uint64_t Element = Expr.getElement(i); - if (Element == dwarf::DW_OP_plus) { - Streamer.EmitInt8(dwarf::DW_OP_plus_uconst, "DW_OP_plus_uconst"); - Streamer.EmitULEB128(Expr.getElement(++i)); - } else if (Element == dwarf::DW_OP_deref) { - if (!Loc.isReg()) - Streamer.EmitInt8(dwarf::DW_OP_deref, "DW_OP_deref"); - } else if (Element == dwarf::DW_OP_piece) { - i += 3; - // handled in emitDebugLocEntry. - } else - llvm_unreachable("unknown Opcode found in complex address"); - } + if (Loc.getOffset()) { + DwarfExpr.AddMachineRegIndirect(Loc.getReg(), Loc.getOffset()); + DwarfExpr.AddExpression(Expr.begin(), Expr.end(), PieceOffsetInBits); + } else + DwarfExpr.AddMachineRegExpression(Expr, Loc.getReg(), + PieceOffsetInBits); } } // else ... ignore constant fp. There is not any good way to @@ -1841,13 +1751,26 @@ struct ArangeSpan { // Emit a debug aranges section, containing a CU lookup for any // address we can tie back to a CU. void DwarfDebug::emitDebugARanges() { - // Start the dwarf aranges section. - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfARangesSection()); + // Provides a unique id per text section. + DenseMap<const MCSection *, SmallVector<SymbolCU, 8>> SectionMap; - typedef DenseMap<DwarfCompileUnit *, std::vector<ArangeSpan>> SpansType; + // Prime section data. + SectionMap[Asm->getObjFileLowering().getTextSection()]; - SpansType Spans; + // Filter labels by section. + for (const SymbolCU &SCU : ArangeLabels) { + if (SCU.Sym->isInSection()) { + // Make a note of this symbol and it's section. + const MCSection *Section = &SCU.Sym->getSection(); + if (!Section->getKind().isMetadata()) + SectionMap[Section].push_back(SCU); + } else { + // Some symbols (e.g. common/bss on mach-o) can have no section but still + // appear in the output. This sucks as we rely on sections to build + // arange spans. We can do it without, but it's icky. + SectionMap[nullptr].push_back(SCU); + } + } // Build a list of sections used. std::vector<const MCSection *> Sections; @@ -1860,12 +1783,45 @@ void DwarfDebug::emitDebugARanges() { // This is only done to ensure consistent output order across different runs. std::sort(Sections.begin(), Sections.end(), SectionSort); - // Build a set of address spans, sorted by CU. + // Add terminating symbols for each section. + for (unsigned ID = 0, E = Sections.size(); ID != E; ID++) { + const MCSection *Section = Sections[ID]; + MCSymbol *Sym = nullptr; + + if (Section) { + // We can't call MCSection::getLabelEndName, as it's only safe to do so + // if we know the section name up-front. For user-created sections, the + // resulting label may not be valid to use as a label. (section names can + // use a greater set of characters on some systems) + Sym = Asm->GetTempSymbol("debug_end", ID); + Asm->OutStreamer.SwitchSection(Section); + Asm->OutStreamer.EmitLabel(Sym); + } + + // Insert a final terminator. + SectionMap[Section].push_back(SymbolCU(nullptr, Sym)); + } + + DenseMap<DwarfCompileUnit *, std::vector<ArangeSpan>> Spans; + for (const MCSection *Section : Sections) { SmallVector<SymbolCU, 8> &List = SectionMap[Section]; if (List.size() < 2) continue; + // If we have no section (e.g. common), just write out + // individual spans for each symbol. + if (!Section) { + for (const SymbolCU &Cur : List) { + ArangeSpan Span; + Span.Start = Cur.Sym; + Span.End = nullptr; + if (Cur.CU) + Spans[Cur.CU].push_back(Span); + } + continue; + } + // Sort the symbols by offset within the section. std::sort(List.begin(), List.end(), [&](const SymbolCU &A, const SymbolCU &B) { @@ -1881,35 +1837,27 @@ void DwarfDebug::emitDebugARanges() { return IA < IB; }); - // If we have no section (e.g. common), just write out - // individual spans for each symbol. - if (!Section) { - for (const SymbolCU &Cur : List) { + // Build spans between each label. + const MCSymbol *StartSym = List[0].Sym; + for (size_t n = 1, e = List.size(); n < e; n++) { + const SymbolCU &Prev = List[n - 1]; + const SymbolCU &Cur = List[n]; + + // Try and build the longest span we can within the same CU. + if (Cur.CU != Prev.CU) { ArangeSpan Span; - Span.Start = Cur.Sym; - Span.End = nullptr; - if (Cur.CU) - Spans[Cur.CU].push_back(Span); - } - } else { - // Build spans between each label. - const MCSymbol *StartSym = List[0].Sym; - for (size_t n = 1, e = List.size(); n < e; n++) { - const SymbolCU &Prev = List[n - 1]; - const SymbolCU &Cur = List[n]; - - // Try and build the longest span we can within the same CU. - if (Cur.CU != Prev.CU) { - ArangeSpan Span; - Span.Start = StartSym; - Span.End = Cur.Sym; - Spans[Prev.CU].push_back(Span); - StartSym = Cur.Sym; - } + Span.Start = StartSym; + Span.End = Cur.Sym; + Spans[Prev.CU].push_back(Span); + StartSym = Cur.Sym; } } } + // Start the dwarf aranges section. + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfARangesSection()); + unsigned PtrSize = Asm->getDataLayout().getPointerSize(); // Build a list of CUs used. diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 48c2809..1c0e163 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -14,26 +14,25 @@ #ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H #define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H -#include "DwarfFile.h" #include "AsmPrinterHandler.h" -#include "DIE.h" #include "DbgValueHistoryCalculator.h" #include "DebugLocEntry.h" #include "DebugLocList.h" #include "DwarfAccelTable.h" +#include "DwarfFile.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringMap.h" -#include "llvm/ADT/FoldingSet.h" +#include "llvm/CodeGen/DIE.h" #include "llvm/CodeGen/LexicalScopes.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugLoc.h" -#include "llvm/MC/MachineLocation.h" #include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MachineLocation.h" #include "llvm/Support/Allocator.h" - #include <memory> namespace llvm { @@ -68,41 +67,67 @@ public: //===----------------------------------------------------------------------===// /// \brief This class is used to track local variable information. +/// +/// - Variables whose location changes over time have a DotDebugLocOffset and +/// the other fields are not used. +/// +/// - Variables that are described by multiple MMI table entries have multiple +/// expressions and frame indices. class DbgVariable { - DIVariable Var; // Variable Descriptor. - DIExpression Expr; // Complex address location expression. - DIE *TheDIE; // Variable DIE. - unsigned DotDebugLocOffset; // Offset in DotDebugLocEntries. - const MachineInstr *MInsn; // DBG_VALUE instruction of the variable. - int FrameIndex; + DIVariable Var; /// Variable Descriptor. + SmallVector<DIExpression, 1> Expr; /// Complex address location expression. + DIE *TheDIE; /// Variable DIE. + unsigned DotDebugLocOffset; /// Offset in DotDebugLocEntries. + const MachineInstr *MInsn; /// DBG_VALUE instruction of the variable. + SmallVector<int, 1> FrameIndex; /// Frame index of the variable. DwarfDebug *DD; public: /// Construct a DbgVariable from a DIVariable. - DbgVariable(DIVariable V, DIExpression E, DwarfDebug *DD) - : Var(V), Expr(E), TheDIE(nullptr), DotDebugLocOffset(~0U), - MInsn(nullptr), FrameIndex(~0), DD(DD) { - assert(Var.Verify() && Expr.Verify()); + DbgVariable(DIVariable V, DIExpression E, DwarfDebug *DD, int FI = ~0) + : Var(V), Expr(1, E), TheDIE(nullptr), DotDebugLocOffset(~0U), + MInsn(nullptr), DD(DD) { + FrameIndex.push_back(FI); + assert(Var.Verify() && E.Verify()); } /// Construct a DbgVariable from a DEBUG_VALUE. /// AbstractVar may be NULL. DbgVariable(const MachineInstr *DbgValue, DwarfDebug *DD) - : Var(DbgValue->getDebugVariable()), Expr(DbgValue->getDebugExpression()), - TheDIE(nullptr), DotDebugLocOffset(~0U), MInsn(DbgValue), - FrameIndex(~0), DD(DD) {} + : Var(DbgValue->getDebugVariable()), + Expr(1, DbgValue->getDebugExpression()), TheDIE(nullptr), + DotDebugLocOffset(~0U), MInsn(DbgValue), DD(DD) { + FrameIndex.push_back(~0); + } // Accessors. DIVariable getVariable() const { return Var; } - DIExpression getExpression() const { return Expr; } + const ArrayRef<DIExpression> getExpression() const { return Expr; } void setDIE(DIE &D) { TheDIE = &D; } DIE *getDIE() const { return TheDIE; } void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; } unsigned getDotDebugLocOffset() const { return DotDebugLocOffset; } StringRef getName() const { return Var.getName(); } const MachineInstr *getMInsn() const { return MInsn; } - int getFrameIndex() const { return FrameIndex; } - void setFrameIndex(int FI) { FrameIndex = FI; } + const ArrayRef<int> getFrameIndex() const { return FrameIndex; } + + void addMMIEntry(const DbgVariable &V) { + assert( DotDebugLocOffset == ~0U && !MInsn && "not an MMI entry"); + assert(V.DotDebugLocOffset == ~0U && !V.MInsn && "not an MMI entry"); + assert(V.Var == Var && "conflicting DIVariable"); + + if (V.getFrameIndex().back() != ~0) { + auto E = V.getExpression(); + auto FI = V.getFrameIndex(); + Expr.append(E.begin(), E.end()); + FrameIndex.append(FI.begin(), FI.end()); + } + assert(Expr.size() > 1 + ? std::all_of(Expr.begin(), Expr.end(), + [](DIExpression &E) { return E.isBitPiece(); }) + : (true && "conflicting locations for variable")); + } + // Translate tag to proper Dwarf tag. dwarf::Tag getTag() const { if (Var.getTag() == dwarf::DW_TAG_arg_variable) @@ -129,14 +154,11 @@ public: bool variableHasComplexAddress() const { assert(Var.isVariable() && "Invalid complex DbgVariable!"); - return Expr.getNumElements() > 0; + assert(Expr.size() == 1 && + "variableHasComplexAddress() invoked on multi-FI variable"); + return Expr.back().getNumElements() > 0; } bool isBlockByrefVariable() const; - unsigned getNumAddrElements() const { - assert(Var.isVariable() && "Invalid complex DbgVariable!"); - return Expr.getNumElements(); - } - uint64_t getAddrElement(unsigned i) const { return Expr.getElement(i); } DIType getType() const; private: @@ -179,10 +201,6 @@ class DwarfDebug : public AsmPrinterHandler { // Size of each symbol emitted (for those symbols that have a specific size). DenseMap<const MCSymbol *, uint64_t> SymSize; - // Provides a unique id per text section. - typedef DenseMap<const MCSection *, SmallVector<SymbolCU, 8> > SectionMapType; - SectionMapType SectionMap; - LexicalScopes LScopes; // Collection of abstract variables. @@ -259,7 +277,8 @@ class DwarfDebug : public AsmPrinterHandler { // them. DenseMap<const MDNode *, const DwarfTypeUnit *> DwarfTypeUnits; - SmallVector<std::pair<std::unique_ptr<DwarfTypeUnit>, DICompositeType>, 1> TypeUnitsUnderConstruction; + SmallVector<std::pair<std::unique_ptr<DwarfTypeUnit>, DICompositeType>, 1> + TypeUnitsUnderConstruction; // Whether to emit the pubnames/pubtypes sections. bool HasDwarfPubSections; @@ -348,10 +367,6 @@ class DwarfDebug : public AsmPrinterHandler { /// processed. void finalizeModuleInfo(); - /// \brief Emit labels to close any remaining sections that have been left - /// open. - void endSections(); - /// \brief Emit the debug info section. void emitDebugInfo(); @@ -565,7 +580,8 @@ public: void emitDebugLocEntry(ByteStreamer &Streamer, const DebugLocEntry &Entry); /// \brief emit a single value for the debug loc section. void emitDebugLocValue(ByteStreamer &Streamer, - const DebugLocEntry::Value &Value); + const DebugLocEntry::Value &Value, + unsigned PieceOffsetInBits = 0); /// Emits an optimal (=sorted) sequence of DW_OP_pieces. void emitLocPieces(ByteStreamer &Streamer, const DITypeIdentifierMap &Map, diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp new file mode 100644 index 0000000..fcab067 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -0,0 +1,269 @@ +//===-- llvm/CodeGen/DwarfExpression.cpp - Dwarf Debug Framework ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf debug info into asm files. +// +//===----------------------------------------------------------------------===// + +#include "DwarfExpression.h" +#include "DwarfDebug.h" +#include "llvm/ADT/SmallBitVector.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +const TargetRegisterInfo *DwarfExpression::getTRI() const { + return AP.TM.getSubtargetImpl()->getRegisterInfo(); +} + +unsigned DwarfExpression::getDwarfVersion() const { + return AP.getDwarfDebug()->getDwarfVersion(); +} + +void DwarfExpression::AddReg(int DwarfReg, const char *Comment) { + assert(DwarfReg >= 0 && "invalid negative dwarf register number"); + if (DwarfReg < 32) { + EmitOp(dwarf::DW_OP_reg0 + DwarfReg, Comment); + } else { + EmitOp(dwarf::DW_OP_regx, Comment); + EmitUnsigned(DwarfReg); + } +} + +void DwarfExpression::AddRegIndirect(int DwarfReg, int Offset, bool Deref) { + assert(DwarfReg >= 0 && "invalid negative dwarf register number"); + if (DwarfReg < 32) { + EmitOp(dwarf::DW_OP_breg0 + DwarfReg); + } else { + EmitOp(dwarf::DW_OP_bregx); + EmitUnsigned(DwarfReg); + } + EmitSigned(Offset); + if (Deref) + EmitOp(dwarf::DW_OP_deref); +} + +void DwarfExpression::AddOpPiece(unsigned SizeInBits, unsigned OffsetInBits) { + assert(SizeInBits > 0 && "piece has size zero"); + const unsigned SizeOfByte = 8; + if (OffsetInBits > 0 || SizeInBits % SizeOfByte) { + EmitOp(dwarf::DW_OP_bit_piece); + EmitUnsigned(SizeInBits); + EmitUnsigned(OffsetInBits); + } else { + EmitOp(dwarf::DW_OP_piece); + unsigned ByteSize = SizeInBits / SizeOfByte; + EmitUnsigned(ByteSize); + } +} + +void DwarfExpression::AddShr(unsigned ShiftBy) { + EmitOp(dwarf::DW_OP_constu); + EmitUnsigned(ShiftBy); + EmitOp(dwarf::DW_OP_shr); +} + +bool DwarfExpression::AddMachineRegIndirect(unsigned MachineReg, int Offset) { + int DwarfReg = getTRI()->getDwarfRegNum(MachineReg, false); + if (DwarfReg < 0) + return false; + + if (isFrameRegister(MachineReg)) { + // If variable offset is based in frame register then use fbreg. + EmitOp(dwarf::DW_OP_fbreg); + EmitSigned(Offset); + } else { + AddRegIndirect(DwarfReg, Offset); + } + return true; +} + +bool DwarfExpression::AddMachineRegPiece(unsigned MachineReg, + unsigned PieceSizeInBits, + unsigned PieceOffsetInBits) { + const TargetRegisterInfo *TRI = getTRI(); + if (!TRI->isPhysicalRegister(MachineReg)) + return false; + + int Reg = TRI->getDwarfRegNum(MachineReg, false); + + // If this is a valid register number, emit it. + if (Reg >= 0) { + AddReg(Reg); + if (PieceSizeInBits) + AddOpPiece(PieceSizeInBits, PieceOffsetInBits); + return true; + } + + // Walk up the super-register chain until we find a valid number. + // For example, EAX on x86_64 is a 32-bit piece of RAX with offset 0. + for (MCSuperRegIterator SR(MachineReg, TRI); SR.isValid(); ++SR) { + Reg = TRI->getDwarfRegNum(*SR, false); + if (Reg >= 0) { + unsigned Idx = TRI->getSubRegIndex(*SR, MachineReg); + unsigned Size = TRI->getSubRegIdxSize(Idx); + unsigned RegOffset = TRI->getSubRegIdxOffset(Idx); + AddReg(Reg, "super-register"); + if (PieceOffsetInBits == RegOffset) { + AddOpPiece(Size, RegOffset); + } else { + // If this is part of a variable in a sub-register at a + // non-zero offset, we need to manually shift the value into + // place, since the DW_OP_piece describes the part of the + // variable, not the position of the subregister. + if (RegOffset) + AddShr(RegOffset); + AddOpPiece(Size, PieceOffsetInBits); + } + return true; + } + } + + // Otherwise, attempt to find a covering set of sub-register numbers. + // For example, Q0 on ARM is a composition of D0+D1. + // + // Keep track of the current position so we can emit the more + // efficient DW_OP_piece. + unsigned CurPos = PieceOffsetInBits; + // The size of the register in bits, assuming 8 bits per byte. + unsigned RegSize = TRI->getMinimalPhysRegClass(MachineReg)->getSize() * 8; + // Keep track of the bits in the register we already emitted, so we + // can avoid emitting redundant aliasing subregs. + SmallBitVector Coverage(RegSize, false); + for (MCSubRegIterator SR(MachineReg, TRI); SR.isValid(); ++SR) { + unsigned Idx = TRI->getSubRegIndex(MachineReg, *SR); + unsigned Size = TRI->getSubRegIdxSize(Idx); + unsigned Offset = TRI->getSubRegIdxOffset(Idx); + Reg = TRI->getDwarfRegNum(*SR, false); + + // Intersection between the bits we already emitted and the bits + // covered by this subregister. + SmallBitVector Intersection(RegSize, false); + Intersection.set(Offset, Offset + Size); + Intersection ^= Coverage; + + // If this sub-register has a DWARF number and we haven't covered + // its range, emit a DWARF piece for it. + if (Reg >= 0 && Intersection.any()) { + AddReg(Reg, "sub-register"); + AddOpPiece(Size, Offset == CurPos ? 0 : Offset); + CurPos = Offset + Size; + + // Mark it as emitted. + Coverage.set(Offset, Offset + Size); + } + } + + return CurPos > PieceOffsetInBits; +} + +void DwarfExpression::AddSignedConstant(int Value) { + EmitOp(dwarf::DW_OP_consts); + EmitSigned(Value); + // The proper way to describe a constant value is + // DW_OP_constu <const>, DW_OP_stack_value. + // Unfortunately, DW_OP_stack_value was not available until DWARF-4, + // so we will continue to generate DW_OP_constu <const> for DWARF-2 + // and DWARF-3. Technically, this is incorrect since DW_OP_const <const> + // actually describes a value at a constant addess, not a constant value. + // However, in the past there was no better way to describe a constant + // value, so the producers and consumers started to rely on heuristics + // to disambiguate the value vs. location status of the expression. + // See PR21176 for more details. + if (getDwarfVersion() >= 4) + EmitOp(dwarf::DW_OP_stack_value); +} + +void DwarfExpression::AddUnsignedConstant(unsigned Value) { + EmitOp(dwarf::DW_OP_constu); + EmitUnsigned(Value); + // cf. comment in DwarfExpression::AddSignedConstant(). + if (getDwarfVersion() >= 4) + EmitOp(dwarf::DW_OP_stack_value); +} + +static unsigned getOffsetOrZero(unsigned OffsetInBits, + unsigned PieceOffsetInBits) { + if (OffsetInBits == PieceOffsetInBits) + return 0; + assert(OffsetInBits >= PieceOffsetInBits && "overlapping pieces"); + return OffsetInBits; +} + +bool DwarfExpression::AddMachineRegExpression(DIExpression Expr, + unsigned MachineReg, + unsigned PieceOffsetInBits) { + auto I = Expr.begin(); + // Pattern-match combinations for which more efficient representations exist + // first. + if (I == Expr.end()) + return AddMachineRegPiece(MachineReg); + + bool ValidReg = false; + switch (*I) { + case dwarf::DW_OP_bit_piece: { + unsigned OffsetInBits = I->getArg(1); + unsigned SizeInBits = I->getArg(2); + // Piece always comes at the end of the expression. + return AddMachineRegPiece(MachineReg, SizeInBits, + getOffsetOrZero(OffsetInBits, PieceOffsetInBits)); + } + case dwarf::DW_OP_plus: + // [DW_OP_reg,Offset,DW_OP_plus,DW_OP_deref] --> [DW_OP_breg,Offset]. + if (I->getNext() == dwarf::DW_OP_deref) { + unsigned Offset = I->getArg(1); + ValidReg = AddMachineRegIndirect(MachineReg, Offset); + std::advance(I, 2); + break; + } else + ValidReg = AddMachineRegPiece(MachineReg); + case dwarf::DW_OP_deref: + // [DW_OP_reg,DW_OP_deref] --> [DW_OP_breg]. + ValidReg = AddMachineRegIndirect(MachineReg); + ++I; + break; + default: + llvm_unreachable("unsupported operand"); + } + + if (!ValidReg) + return false; + + // Emit remaining elements of the expression. + AddExpression(I, Expr.end(), PieceOffsetInBits); + return true; +} + +void DwarfExpression::AddExpression(DIExpression::iterator I, + DIExpression::iterator E, + unsigned PieceOffsetInBits) { + for (; I != E; ++I) { + switch (*I) { + case dwarf::DW_OP_bit_piece: { + unsigned OffsetInBits = I->getArg(1); + unsigned SizeInBits = I->getArg(2); + AddOpPiece(SizeInBits, getOffsetOrZero(OffsetInBits, PieceOffsetInBits)); + break; + } + case dwarf::DW_OP_plus: + EmitOp(dwarf::DW_OP_plus_uconst); + EmitUnsigned(I->getArg(1)); + break; + case dwarf::DW_OP_deref: + EmitOp(dwarf::DW_OP_deref); + break; + default: + llvm_unreachable("unhandled opcode found in DIExpression"); + } + } +} diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.h b/lib/CodeGen/AsmPrinter/DwarfExpression.h new file mode 100644 index 0000000..b90b7b6 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfExpression.h @@ -0,0 +1,133 @@ +//===-- llvm/CodeGen/DwarfExpression.h - Dwarf Compile Unit ---*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf compile unit. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXPRESSION_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXPRESSION_H + +#include "llvm/IR/DebugInfo.h" +#include "llvm/Support/DataTypes.h" + +namespace llvm { + +class AsmPrinter; +class ByteStreamer; +class TargetRegisterInfo; +class DwarfUnit; +class DIELoc; + +/// Base class containing the logic for constructing DWARF expressions +/// independently of whether they are emitted into a DIE or into a .debug_loc +/// entry. +class DwarfExpression { +protected: + const AsmPrinter &AP; + // Various convenience accessors that extract things out of AsmPrinter. + const TargetRegisterInfo *getTRI() const; + unsigned getDwarfVersion() const; + +public: + DwarfExpression(const AsmPrinter &AP) : AP(AP) {} + virtual ~DwarfExpression() {} + + /// Output a dwarf operand and an optional assembler comment. + virtual void EmitOp(uint8_t Op, const char *Comment = nullptr) = 0; + /// Emit a raw signed value. + virtual void EmitSigned(int Value) = 0; + /// Emit a raw unsigned value. + virtual void EmitUnsigned(unsigned Value) = 0; + /// Return whether the given machine register is the frame register in the + /// current function. + virtual bool isFrameRegister(unsigned MachineReg) = 0; + + /// Emit a dwarf register operation. + void AddReg(int DwarfReg, const char *Comment = nullptr); + /// Emit an (double-)indirect dwarf register operation. + void AddRegIndirect(int DwarfReg, int Offset, bool Deref = false); + + /// Emit a dwarf register operation for describing + /// - a small value occupying only part of a register or + /// - a register representing only part of a value. + void AddOpPiece(unsigned SizeInBits, unsigned OffsetInBits = 0); + /// Emit a shift-right dwarf expression. + void AddShr(unsigned ShiftBy); + + /// Emit an indirect dwarf register operation for the given machine register. + /// \return false if no DWARF register exists for MachineReg. + bool AddMachineRegIndirect(unsigned MachineReg, int Offset = 0); + + /// \brief Emit a partial DWARF register operation. + /// \param MachineReg the register + /// \param PieceSizeInBits size and + /// \param PieceOffsetInBits offset of the piece in bits, if this is one + /// piece of an aggregate value. + /// + /// If size and offset is zero an operation for the entire + /// register is emitted: Some targets do not provide a DWARF + /// register number for every register. If this is the case, this + /// function will attempt to emit a DWARF register by emitting a + /// piece of a super-register or by piecing together multiple + /// subregisters that alias the register. + /// + /// \return false if no DWARF register exists for MachineReg. + bool AddMachineRegPiece(unsigned MachineReg, unsigned PieceSizeInBits = 0, + unsigned PieceOffsetInBits = 0); + + /// Emit a signed constant. + void AddSignedConstant(int Value); + /// Emit an unsigned constant. + void AddUnsignedConstant(unsigned Value); + + /// Emit an entire DIExpression on top of a machine register location. + /// \param PieceOffsetInBits If this is one piece out of a fragmented + /// location, this is the offset of the piece inside the entire variable. + /// \return false if no DWARF register exists for MachineReg. + bool AddMachineRegExpression(DIExpression Expr, unsigned MachineReg, + unsigned PieceOffsetInBits = 0); + /// Emit a the operations remaining the DIExpressionIterator I. + /// \param PieceOffsetInBits If this is one piece out of a fragmented + /// location, this is the offset of the piece inside the entire variable. + void AddExpression(DIExpression::iterator I, DIExpression::iterator E, + unsigned PieceOffsetInBits = 0); +}; + +/// DwarfExpression implementation for .debug_loc entries. +class DebugLocDwarfExpression : public DwarfExpression { + ByteStreamer &BS; + +public: + DebugLocDwarfExpression(const AsmPrinter &AP, ByteStreamer &BS) + : DwarfExpression(AP), BS(BS) {} + + void EmitOp(uint8_t Op, const char *Comment = nullptr) override; + void EmitSigned(int Value) override; + void EmitUnsigned(unsigned Value) override; + bool isFrameRegister(unsigned MachineReg) override; +}; + +/// DwarfExpression implementation for singular DW_AT_location. +class DIEDwarfExpression : public DwarfExpression { + DwarfUnit &DU; + DIELoc &DIE; + +public: + DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU, DIELoc &DIE) + : DwarfExpression(AP), DU(DU), DIE(DIE) {} + + void EmitOp(uint8_t Op, const char *Comment = nullptr) override; + void EmitSigned(int Value) override; + void EmitUnsigned(unsigned Value) override; + bool isFrameRegister(unsigned MachineReg) override; +}; +} + +#endif diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/lib/CodeGen/AsmPrinter/DwarfFile.cpp index 50180ea..3988f0d 100644 --- a/lib/CodeGen/AsmPrinter/DwarfFile.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfFile.cpp @@ -8,13 +8,12 @@ //===----------------------------------------------------------------------===// #include "DwarfFile.h" - #include "DwarfDebug.h" #include "DwarfUnit.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/DataLayout.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Support/LEB128.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/Target/TargetLoweringObjectFile.h" namespace llvm { @@ -147,7 +146,7 @@ void DwarfFile::emitStrings(const MCSection *StrSection, StrPool.emit(*Asm, StrSection, OffsetSection); } -void DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) { +bool DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) { SmallVectorImpl<DbgVariable *> &Vars = ScopeVariables[LS]; DIVariable DV = Var->getVariable(); // Variables with positive arg numbers are parameters. @@ -169,18 +168,17 @@ void DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) { // A later indexed parameter has been found, insert immediately before it. if (CurNum > ArgNum) break; - // FIXME: There are still some cases where two inlined functions are - // conflated together (two calls to the same function at the same - // location (eg: via a macro, or without column info, etc)) and then - // their arguments are conflated as well. - assert((LS->getParent() || CurNum != ArgNum) && - "Duplicate argument for top level (non-inlined) function"); + if (CurNum == ArgNum) { + (*I)->addMMIEntry(*Var); + return false; + } ++I; } Vars.insert(I, Var); - return; + return true; } Vars.push_back(Var); + return true; } } diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.h b/lib/CodeGen/AsmPrinter/DwarfFile.h index 9d64bfc..35bf33a 100644 --- a/lib/CodeGen/AsmPrinter/DwarfFile.h +++ b/lib/CodeGen/AsmPrinter/DwarfFile.h @@ -10,17 +10,16 @@ #ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFFILE_H #define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFFILE_H +#include "AddressPool.h" +#include "DwarfStringPool.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/Support/Allocator.h" -#include "AddressPool.h" -#include "DwarfStringPool.h" - -#include <vector> -#include <string> #include <memory> +#include <string> +#include <vector> namespace llvm { class AsmPrinter; @@ -96,7 +95,8 @@ public: /// \brief Returns the string pool. DwarfStringPool &getStringPool() { return StrPool; } - void addScopeVariable(LexicalScope *LS, DbgVariable *Var); + /// \returns false if the variable was merged with a previous one. + bool addScopeVariable(LexicalScope *LS, DbgVariable *Var); DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8>> &getScopeVariables() { return ScopeVariables; diff --git a/lib/CodeGen/AsmPrinter/DwarfStringPool.h b/lib/CodeGen/AsmPrinter/DwarfStringPool.h index ab32c1b..63e3412 100644 --- a/lib/CodeGen/AsmPrinter/DwarfStringPool.h +++ b/lib/CodeGen/AsmPrinter/DwarfStringPool.h @@ -13,7 +13,6 @@ #include "llvm/ADT/StringMap.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/Support/Allocator.h" - #include <utility> namespace llvm { diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 919d9d2..b0c7d48 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -12,10 +12,10 @@ //===----------------------------------------------------------------------===// #include "DwarfUnit.h" - #include "DwarfAccelTable.h" #include "DwarfCompileUnit.h" #include "DwarfDebug.h" +#include "DwarfExpression.h" #include "llvm/ADT/APFloat.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" @@ -43,6 +43,20 @@ GenerateDwarfTypeUnits("generate-type-units", cl::Hidden, cl::desc("Generate DWARF4 type units."), cl::init(false)); +void DIEDwarfExpression::EmitOp(uint8_t Op, const char* Comment) { + DU.addUInt(DIE, dwarf::DW_FORM_data1, Op); +} +void DIEDwarfExpression::EmitSigned(int Value) { + DU.addSInt(DIE, dwarf::DW_FORM_sdata, Value); +} +void DIEDwarfExpression::EmitUnsigned(unsigned Value) { + DU.addUInt(DIE, dwarf::DW_FORM_udata, Value); +} +bool DIEDwarfExpression::isFrameRegister(unsigned MachineReg) { + return MachineReg == getTRI()->getFrameRegister(*AP.MF); +} + + /// Unit - Unit constructor. DwarfUnit::DwarfUnit(unsigned UID, dwarf::Tag UnitTag, DICompileUnit Node, AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU) @@ -116,6 +130,30 @@ int64_t DwarfUnit::getDefaultLowerBound() const { if (dwarf::DWARF_VERSION >= 4) return 1; break; + + // The languages below have valid values only if the DWARF version >= 5. + case dwarf::DW_LANG_OpenCL: + case dwarf::DW_LANG_Go: + case dwarf::DW_LANG_Haskell: + case dwarf::DW_LANG_C_plus_plus_03: + case dwarf::DW_LANG_C_plus_plus_11: + case dwarf::DW_LANG_OCaml: + case dwarf::DW_LANG_Rust: + case dwarf::DW_LANG_C11: + case dwarf::DW_LANG_Swift: + case dwarf::DW_LANG_Dylan: + case dwarf::DW_LANG_C_plus_plus_14: + if (dwarf::DWARF_VERSION >= 5) + return 0; + break; + + case dwarf::DW_LANG_Modula3: + case dwarf::DW_LANG_Julia: + case dwarf::DW_LANG_Fortran03: + case dwarf::DW_LANG_Fortran08: + if (dwarf::DWARF_VERSION >= 5) + return 1; + break; } return -1; @@ -399,85 +437,18 @@ void DwarfUnit::addSourceLine(DIE &Die, DINameSpace NS) { } /// addRegisterOp - Add register operand. -// FIXME: Ideally, this would share the implementation with -// AsmPrinter::EmitDwarfRegOpPiece. -void DwarfUnit::addRegisterOpPiece(DIELoc &TheDie, unsigned Reg, +bool DwarfUnit::addRegisterOpPiece(DIELoc &TheDie, unsigned Reg, unsigned SizeInBits, unsigned OffsetInBits) { - const TargetRegisterInfo *RI = Asm->TM.getSubtargetImpl()->getRegisterInfo(); - int DWReg = RI->getDwarfRegNum(Reg, false); - bool isSubRegister = DWReg < 0; - - unsigned Idx = 0; - - // Go up the super-register chain until we hit a valid dwarf register number. - for (MCSuperRegIterator SR(Reg, RI); SR.isValid() && DWReg < 0; ++SR) { - DWReg = RI->getDwarfRegNum(*SR, false); - if (DWReg >= 0) - Idx = RI->getSubRegIndex(*SR, Reg); - } - - if (DWReg < 0) { - DEBUG(dbgs() << "Invalid Dwarf register number.\n"); - addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_nop); - return; - } - - // Emit register. - if (DWReg < 32) - addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + DWReg); - else { - addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_regx); - addUInt(TheDie, dwarf::DW_FORM_udata, DWReg); - } - - // Emit mask. - bool isPiece = SizeInBits > 0; - if (isSubRegister || isPiece) { - const unsigned SizeOfByte = 8; - unsigned RegSizeInBits = RI->getSubRegIdxSize(Idx); - unsigned RegOffsetInBits = RI->getSubRegIdxOffset(Idx); - unsigned PieceSizeInBits = std::max(SizeInBits, RegSizeInBits); - unsigned PieceOffsetInBits = OffsetInBits ? OffsetInBits : RegOffsetInBits; - assert(RegSizeInBits >= SizeInBits && "register smaller than value"); - - if (RegOffsetInBits != PieceOffsetInBits) { - // Manually shift the value into place, since the DW_OP_piece - // describes the part of the variable, not the position of the - // subregister. - addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); - addUInt(TheDie, dwarf::DW_FORM_data1, RegOffsetInBits); - addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_shr); - } - - if (PieceOffsetInBits > 0 || PieceSizeInBits % SizeOfByte) { - assert(PieceSizeInBits > 0 && "piece has zero size"); - addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_bit_piece); - addUInt(TheDie, dwarf::DW_FORM_data1, PieceSizeInBits); - addUInt(TheDie, dwarf::DW_FORM_data1, PieceOffsetInBits); - } else { - assert(PieceSizeInBits > 0 && "piece has zero size"); - addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_piece); - addUInt(TheDie, dwarf::DW_FORM_data1, PieceSizeInBits/SizeOfByte); - } - } + DIEDwarfExpression Expr(*Asm, *this, TheDie); + Expr.AddMachineRegPiece(Reg, SizeInBits, OffsetInBits); + return true; } /// addRegisterOffset - Add register offset. -void DwarfUnit::addRegisterOffset(DIELoc &TheDie, unsigned Reg, +bool DwarfUnit::addRegisterOffset(DIELoc &TheDie, unsigned Reg, int64_t Offset) { - const TargetRegisterInfo *RI = Asm->TM.getSubtargetImpl()->getRegisterInfo(); - unsigned DWReg = RI->getDwarfRegNum(Reg, false); - const TargetRegisterInfo *TRI = Asm->TM.getSubtargetImpl()->getRegisterInfo(); - if (Reg == TRI->getFrameRegister(*Asm->MF)) - // If variable offset is based in frame register then use fbreg. - addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_fbreg); - else if (DWReg < 32) - addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + DWReg); - else { - addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); - addUInt(TheDie, dwarf::DW_FORM_udata, DWReg); - } - addSInt(TheDie, dwarf::DW_FORM_sdata, Offset); + DIEDwarfExpression Expr(*Asm, *this, TheDie); + return Expr.AddMachineRegIndirect(Reg, Offset); } /* Byref variables, in Blocks, are declared by the programmer as "SomeType @@ -581,10 +552,14 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die, // variable's location. DIELoc *Loc = new (DIEValueAllocator) DIELoc(); + bool validReg; if (Location.isReg()) - addRegisterOpPiece(*Loc, Location.getReg()); + validReg = addRegisterOpPiece(*Loc, Location.getReg()); else - addRegisterOffset(*Loc, Location.getReg(), Location.getOffset()); + validReg = addRegisterOffset(*Loc, Location.getReg(), Location.getOffset()); + + if (!validReg) + return; // If we started with a pointer to the __Block_byref... struct, then // the first thing we need to do is dereference the pointer (DW_OP_deref). @@ -622,13 +597,19 @@ static bool isUnsignedDIType(DwarfDebug *DD, DIType Ty) { dwarf::Tag T = (dwarf::Tag)Ty.getTag(); // Encode pointer constants as unsigned bytes. This is used at least for // null pointer constant emission. + // (Pieces of) aggregate types that get hacked apart by SROA may also be + // represented by a constant. Encode them as unsigned bytes. // FIXME: reference and rvalue_reference /probably/ shouldn't be allowed // here, but accept them for now due to a bug in SROA producing bogus // dbg.values. - if (T == dwarf::DW_TAG_pointer_type || + if (T == dwarf::DW_TAG_array_type || + T == dwarf::DW_TAG_class_type || + T == dwarf::DW_TAG_pointer_type || T == dwarf::DW_TAG_ptr_to_member_type || T == dwarf::DW_TAG_reference_type || - T == dwarf::DW_TAG_rvalue_reference_type) + T == dwarf::DW_TAG_rvalue_reference_type || + T == dwarf::DW_TAG_structure_type || + T == dwarf::DW_TAG_union_type) return true; assert(T == dwarf::DW_TAG_typedef || T == dwarf::DW_TAG_const_type || T == dwarf::DW_TAG_volatile_type || @@ -649,11 +630,15 @@ static bool isUnsignedDIType(DwarfDebug *DD, DIType Ty) { Encoding == dwarf::DW_ATE_unsigned_char || Encoding == dwarf::DW_ATE_signed || Encoding == dwarf::DW_ATE_signed_char || - Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean) && + Encoding == dwarf::DW_ATE_float || + Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean || + (Ty.getTag() == dwarf::DW_TAG_unspecified_type && + Ty.getName() == "decltype(nullptr)")) && "Unsupported encoding"); return (Encoding == dwarf::DW_ATE_unsigned || Encoding == dwarf::DW_ATE_unsigned_char || - Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean); + Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean || + Ty.getTag() == dwarf::DW_TAG_unspecified_type); } /// If this type is derived from a base type then return base type size. @@ -667,10 +652,7 @@ static uint64_t getBaseTypeSize(DwarfDebug *DD, DIDerivedType Ty) { DIType BaseType = DD->resolve(Ty.getTypeDerivedFrom()); - // If this type is not derived from any type or the type is a declaration then - // take conservative approach. - if (!BaseType.isValid() || BaseType.isForwardDecl()) - return Ty.getSizeInBits(); + assert(BaseType.isValid() && "Unexpected invalid base type"); // If this is a derived type, go ahead and get the base type, unless it's a // reference then it's just the size of the field. Pointer types have no need @@ -977,7 +959,8 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) { addString(Buffer, dwarf::DW_AT_name, Name); // Add size if non-zero (derived types might be zero-sized.) - if (Size && Tag != dwarf::DW_TAG_pointer_type) + if (Size && Tag != dwarf::DW_TAG_pointer_type + && Tag != dwarf::DW_TAG_ptr_to_member_type) addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size); if (Tag == dwarf::DW_TAG_ptr_to_member_type) @@ -1110,6 +1093,8 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { if (CTy.isAppleBlockExtension()) addFlag(Buffer, dwarf::DW_AT_APPLE_block); + // This is outside the DWARF spec, but GDB expects a DW_AT_containing_type + // inside C++ composite types to point to the base class with the vtable. DICompositeType ContainingType(resolve(CTy.getContainingType())); if (ContainingType) addDIEEntry(Buffer, dwarf::DW_AT_containing_type, @@ -1187,10 +1172,10 @@ DwarfUnit::constructTemplateValueParameterDIE(DIE &Buffer, addType(ParamDIE, resolve(VP.getType())); if (!VP.getName().empty()) addString(ParamDIE, dwarf::DW_AT_name, VP.getName()); - if (Value *Val = VP.getValue()) { - if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) + if (Metadata *Val = VP.getValue()) { + if (ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Val)) addConstantValue(ParamDIE, CI, resolve(VP.getType())); - else if (GlobalValue *GV = dyn_cast<GlobalValue>(Val)) { + else if (GlobalValue *GV = mdconst::dyn_extract<GlobalValue>(Val)) { // For declaration non-type template parameters (such as global values and // functions) DIELoc *Loc = new (DIEValueAllocator) DIELoc(); @@ -1359,7 +1344,7 @@ void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie, if (SP.isOptimized()) addFlag(SPDie, dwarf::DW_AT_APPLE_optimized); - if (unsigned isa = Asm->getISAEncoding()) { + if (unsigned isa = Asm->getISAEncoding(SP.getFunction())) { addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa); } @@ -1511,7 +1496,7 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) { uint64_t FieldSize = getBaseTypeSize(DD, DT); uint64_t OffsetInBytes; - if (Size != FieldSize) { + if (FieldSize && Size != FieldSize) { // Handle bitfield, assume bytes are 8 bits. addUInt(MemberDie, dwarf::DW_AT_byte_size, None, FieldSize/8); addUInt(MemberDie, dwarf::DW_AT_bit_size, None, Size); diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h index f40c937..7a5e47d 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -14,17 +14,17 @@ #ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFUNIT_H #define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFUNIT_H -#include "DIE.h" #include "DwarfDebug.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringMap.h" #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DIE.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DebugInfo.h" +#include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSection.h" -#include "llvm/MC/MCDwarf.h" namespace llvm { @@ -138,6 +138,7 @@ public: } // Accessors. + AsmPrinter* getAsmPrinter() const { return Asm; } unsigned getUniqueID() const { return UniqueID; } uint16_t getLanguage() const { return CUNode.getLanguage(); } DICompileUnit getCUNode() const { return CUNode; } @@ -253,12 +254,16 @@ public: /// addTemplateParams - Add template parameters in buffer. void addTemplateParams(DIE &Buffer, DIArray TParams); - /// addRegisterOp - Add register operand. - void addRegisterOpPiece(DIELoc &TheDie, unsigned Reg, + /// \brief Add register operand. + /// \returns false if the register does not exist, e.g., because it was never + /// materialized. + bool addRegisterOpPiece(DIELoc &TheDie, unsigned Reg, unsigned SizeInBits = 0, unsigned OffsetInBits = 0); - /// addRegisterOffset - Add register offset. - void addRegisterOffset(DIELoc &TheDie, unsigned Reg, int64_t Offset); + /// \brief Add register offset. + /// \returns false if the register does not exist, e.g., because it was never + /// materialized. + bool addRegisterOffset(DIELoc &TheDie, unsigned Reg, int64_t Offset); // FIXME: Should be reformulated in terms of addComplexAddress. /// addBlockByrefAddress - Start with the address based on the location diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/lib/CodeGen/AsmPrinter/EHStreamer.cpp index 2bbffb3..4841814 100644 --- a/lib/CodeGen/AsmPrinter/EHStreamer.cpp +++ b/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -121,7 +121,8 @@ computeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads, for (unsigned J = NumShared, M = TypeIds.size(); J != M; ++J) { int TypeID = TypeIds[J]; assert(-1 - TypeID < (int)FilterOffsets.size() && "Unknown filter id!"); - int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID; + int ValueForTypeID = + isFilterEHSelector(TypeID) ? FilterOffsets[-1 - TypeID] : TypeID; unsigned SizeTypeID = getSLEB128Size(ValueForTypeID); int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0; @@ -195,9 +196,22 @@ bool EHStreamer::callToNoUnwindFunction(const MachineInstr *MI) { /// table. Entries must be ordered by try-range address. void EHStreamer:: computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, - const RangeMapType &PadMap, const SmallVectorImpl<const LandingPadInfo *> &LandingPads, const SmallVectorImpl<unsigned> &FirstActions) { + // Invokes and nounwind calls have entries in PadMap (due to being bracketed + // by try-range labels when lowered). Ordinary calls do not, so appropriate + // try-ranges for them need be deduced so we can put them in the LSDA. + RangeMapType PadMap; + for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) { + const LandingPadInfo *LandingPad = LandingPads[i]; + for (unsigned j = 0, E = LandingPad->BeginLabels.size(); j != E; ++j) { + MCSymbol *BeginLabel = LandingPad->BeginLabels[j]; + assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!"); + PadRange P = { i, j }; + PadMap[BeginLabel] = P; + } + } + // The end label of the previous invoke or nounwind try-range. MCSymbol *LastLabel = nullptr; @@ -208,6 +222,8 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, // Whether the last CallSite entry was for an invoke. bool PreviousIsInvoke = false; + bool IsSJLJ = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::SjLj; + // Visit all instructions in order of address. for (const auto &MBB : *Asm->MF) { for (const auto &MI : MBB) { @@ -237,7 +253,7 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, // instruction between the previous try-range and this one may throw, // create a call-site entry with no landing pad for the region between the // try-ranges. - if (SawPotentiallyThrowing && Asm->MAI->usesItaniumLSDAForExceptions()) { + if (SawPotentiallyThrowing && Asm->MAI->usesCFIForEH()) { CallSiteEntry Site = { LastLabel, BeginLabel, nullptr, 0 }; CallSites.push_back(Site); PreviousIsInvoke = false; @@ -254,14 +270,14 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, CallSiteEntry Site = { BeginLabel, LastLabel, - LandingPad->LandingPadLabel, + LandingPad, FirstActions[P.PadIndex] }; // Try to merge with the previous call-site. SJLJ doesn't do this - if (PreviousIsInvoke && Asm->MAI->usesItaniumLSDAForExceptions()) { + if (PreviousIsInvoke && !IsSJLJ) { CallSiteEntry &Prev = CallSites.back(); - if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) { + if (Site.LPad == Prev.LPad && Site.Action == Prev.Action) { // Extend the range of the previous entry. Prev.EndLabel = Site.EndLabel; continue; @@ -269,7 +285,7 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, } // Otherwise, create a new call-site. - if (Asm->MAI->usesItaniumLSDAForExceptions()) + if (!IsSJLJ) CallSites.push_back(Site); else { // SjLj EH must maintain the call sites in the order assigned @@ -287,7 +303,7 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, // If some instruction between the previous try-range and the end of the // function may throw, create a call-site entry with no landing pad for the // region following the try-range. - if (SawPotentiallyThrowing && Asm->MAI->usesItaniumLSDAForExceptions()) { + if (SawPotentiallyThrowing && !IsSJLJ) { CallSiteEntry Site = { LastLabel, nullptr, nullptr, 0 }; CallSites.push_back(Site); } @@ -338,23 +354,9 @@ void EHStreamer::emitExceptionTable() { unsigned SizeActions = computeActionsTable(LandingPads, Actions, FirstActions); - // Invokes and nounwind calls have entries in PadMap (due to being bracketed - // by try-range labels when lowered). Ordinary calls do not, so appropriate - // try-ranges for them need be deduced when using DWARF exception handling. - RangeMapType PadMap; - for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) { - const LandingPadInfo *LandingPad = LandingPads[i]; - for (unsigned j = 0, E = LandingPad->BeginLabels.size(); j != E; ++j) { - MCSymbol *BeginLabel = LandingPad->BeginLabels[j]; - assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!"); - PadRange P = { i, j }; - PadMap[BeginLabel] = P; - } - } - // Compute the call-site table. SmallVector<CallSiteEntry, 64> CallSites; - computeCallSiteTable(CallSites, PadMap, LandingPads, FirstActions); + computeCallSiteTable(CallSites, LandingPads, FirstActions); // Final tallies. @@ -519,8 +521,7 @@ void EHStreamer::emitExceptionTable() { Asm->EmitULEB128(S.Action); } } else { - // DWARF Exception handling - assert(Asm->MAI->usesItaniumLSDAForExceptions()); + // Itanium LSDA exception handling // The call-site table is a list of all call sites that may throw an // exception (including C++ 'throw' statements) in the procedure @@ -576,15 +577,15 @@ void EHStreamer::emitExceptionTable() { // Offset of the landing pad, counted in 16-byte bundles relative to the // @LPStart address. - if (!S.PadLabel) { + if (!S.LPad) { if (VerboseAsm) Asm->OutStreamer.AddComment(" has no landing pad"); Asm->OutStreamer.EmitIntValue(0, 4/*size*/); } else { if (VerboseAsm) Asm->OutStreamer.AddComment(Twine(" jumps to ") + - S.PadLabel->getName()); - Asm->EmitLabelDifference(S.PadLabel, EHFuncBeginSym, 4); + S.LPad->LandingPadLabel->getName()); + Asm->EmitLabelDifference(S.LPad->LandingPadLabel, EHFuncBeginSym, 4); } // Offset of the first associated action record, relative to the start of @@ -681,7 +682,7 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) { unsigned TypeID = *I; if (VerboseAsm) { --Entry; - if (TypeID != 0) + if (isFilterEHSelector(TypeID)) Asm->OutStreamer.AddComment("FilterInfo " + Twine(Entry)); } diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.h b/lib/CodeGen/AsmPrinter/EHStreamer.h index 7e9549d..9b316ff 100644 --- a/lib/CodeGen/AsmPrinter/EHStreamer.h +++ b/lib/CodeGen/AsmPrinter/EHStreamer.h @@ -23,6 +23,8 @@ class MachineModuleInfo; class MachineInstr; class MachineFunction; class AsmPrinter; +class MCSymbol; +class MCSymbolRefExpr; template <typename T> class SmallVectorImpl; @@ -60,11 +62,11 @@ protected: /// Structure describing an entry in the call-site table. struct CallSiteEntry { // The 'try-range' is BeginLabel .. EndLabel. - MCSymbol *BeginLabel; // zero indicates the start of the function. - MCSymbol *EndLabel; // zero indicates the end of the function. + MCSymbol *BeginLabel; // Null indicates the start of the function. + MCSymbol *EndLabel; // Null indicates the end of the function. - // The landing pad starts at PadLabel. - MCSymbol *PadLabel; // zero indicates that there is no landing pad. + // LPad contains the landing pad start labels. + const LandingPadInfo *LPad; // Null indicates that there is no landing pad. unsigned Action; }; @@ -86,7 +88,6 @@ protected: /// form gaps in the table. Entries must be ordered by try-range address. void computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, - const RangeMapType &PadMap, const SmallVectorImpl<const LandingPadInfo *> &LPs, const SmallVectorImpl<unsigned> &FirstActions); @@ -113,6 +114,13 @@ protected: virtual void emitTypeInfos(unsigned TTypeEncoding); + // Helpers for for identifying what kind of clause an EH typeid or selector + // corresponds to. Negative selectors are for filter clauses, the zero + // selector is for cleanups, and positive selectors are for catch clauses. + static bool isFilterEHSelector(int Selector) { return Selector < 0; } + static bool isCleanupEHSelector(int Selector) { return Selector == 0; } + static bool isCatchEHSelector(int Selector) { return Selector > 0; } + public: EHStreamer(AsmPrinter *A); virtual ~EHStreamer(); diff --git a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp index 5bda5a9..97a3234 100644 --- a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp @@ -34,35 +34,35 @@ using namespace llvm; namespace { - class ErlangGCPrinter : public GCMetadataPrinter { - public: - void beginAssembly(AsmPrinter &AP) override; - void finishAssembly(AsmPrinter &AP) override; - }; - +class ErlangGCPrinter : public GCMetadataPrinter { +public: + void finishAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) override; +}; } static GCMetadataPrinterRegistry::Add<ErlangGCPrinter> -X("erlang", "erlang-compatible garbage collector"); - -void llvm::linkErlangGCPrinter() { } + X("erlang", "erlang-compatible garbage collector"); -void ErlangGCPrinter::beginAssembly(AsmPrinter &AP) { } +void llvm::linkErlangGCPrinter() {} -void ErlangGCPrinter::finishAssembly(AsmPrinter &AP) { +void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info, + AsmPrinter &AP) { MCStreamer &OS = AP.OutStreamer; - unsigned IntPtrSize = - AP.TM.getSubtargetImpl()->getDataLayout()->getPointerSize(); + unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize(); // Put this in a custom .note section. - AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getContext() - .getELFSection(".note.gc", ELF::SHT_PROGBITS, 0, - SectionKind::getDataRel())); + AP.OutStreamer.SwitchSection( + AP.getObjFileLowering().getContext().getELFSection(".note.gc", + ELF::SHT_PROGBITS, 0)); // For each function... - for (iterator FI = begin(), FE = end(); FI != FE; ++FI) { + for (GCModuleInfo::FuncInfoVec::iterator FI = Info.funcinfo_begin(), + IE = Info.funcinfo_end(); + FI != IE; ++FI) { GCFunctionInfo &MD = **FI; - + if (MD.getStrategy().getName() != getStrategy().getName()) + // this function is managed by some other GC + continue; /** A compact GC layout. Emit this data structure: * * struct { @@ -88,7 +88,7 @@ void ErlangGCPrinter::finishAssembly(AsmPrinter &AP) { // Emit the address of the safe point. OS.AddComment("safe point address"); MCSymbol *Label = PI->Label; - AP.EmitLabelPlusOffset(Label/*Hi*/, 0/*Offset*/, 4/*Size*/); + AP.EmitLabelPlusOffset(Label /*Hi*/, 0 /*Offset*/, 4 /*Size*/); } // Stack information never change in safe points! Only print info from the @@ -101,8 +101,9 @@ void ErlangGCPrinter::finishAssembly(AsmPrinter &AP) { // Emit stack arity, i.e. the number of stacked arguments. unsigned RegisteredArgs = IntPtrSize == 4 ? 5 : 6; - unsigned StackArity = MD.getFunction().arg_size() > RegisteredArgs ? - MD.getFunction().arg_size() - RegisteredArgs : 0; + unsigned StackArity = MD.getFunction().arg_size() > RegisteredArgs + ? MD.getFunction().arg_size() - RegisteredArgs + : 0; OS.AddComment("stack arity"); AP.EmitInt16(StackArity); @@ -113,7 +114,7 @@ void ErlangGCPrinter::finishAssembly(AsmPrinter &AP) { // And for each live root... for (GCFunctionInfo::live_iterator LI = MD.live_begin(PI), LE = MD.live_end(PI); - LI != LE; ++LI) { + LI != LE; ++LI) { // Emit live root's offset within the stack frame. OS.AddComment("stack index (offset / wordsize)"); AP.EmitInt16(LI->StackOffset / IntPtrSize); diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp index 6480d048..76d6a06 100644 --- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -32,18 +32,17 @@ using namespace llvm; namespace { - class OcamlGCMetadataPrinter : public GCMetadataPrinter { - public: - void beginAssembly(AsmPrinter &AP) override; - void finishAssembly(AsmPrinter &AP) override; - }; - +class OcamlGCMetadataPrinter : public GCMetadataPrinter { +public: + void beginAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) override; + void finishAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) override; +}; } static GCMetadataPrinterRegistry::Add<OcamlGCMetadataPrinter> -Y("ocaml", "ocaml 3.10-compatible collector"); + Y("ocaml", "ocaml 3.10-compatible collector"); -void llvm::linkOcamlGCPrinter() { } +void llvm::linkOcamlGCPrinter() {} static void EmitCamlGlobal(const Module &M, AsmPrinter &AP, const char *Id) { const std::string &MId = M.getModuleIdentifier(); @@ -67,12 +66,13 @@ static void EmitCamlGlobal(const Module &M, AsmPrinter &AP, const char *Id) { AP.OutStreamer.EmitLabel(Sym); } -void OcamlGCMetadataPrinter::beginAssembly(AsmPrinter &AP) { +void OcamlGCMetadataPrinter::beginAssembly(Module &M, GCModuleInfo &Info, + AsmPrinter &AP) { AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection()); - EmitCamlGlobal(getModule(), AP, "code_begin"); + EmitCamlGlobal(M, AP, "code_begin"); AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection()); - EmitCamlGlobal(getModule(), AP, "data_begin"); + EmitCamlGlobal(M, AP, "data_begin"); } /// emitAssembly - Print the frametable. The ocaml frametable format is thus: @@ -91,47 +91,59 @@ void OcamlGCMetadataPrinter::beginAssembly(AsmPrinter &AP) { /// (FrameSize and LiveOffsets would overflow). FrameTablePrinter will abort if /// either condition is detected in a function which uses the GC. /// -void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) { - unsigned IntPtrSize = - AP.TM.getSubtargetImpl()->getDataLayout()->getPointerSize(); +void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info, + AsmPrinter &AP) { + unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize(); AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection()); - EmitCamlGlobal(getModule(), AP, "code_end"); + EmitCamlGlobal(M, AP, "code_end"); AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection()); - EmitCamlGlobal(getModule(), AP, "data_end"); + EmitCamlGlobal(M, AP, "data_end"); // FIXME: Why does ocaml emit this?? AP.OutStreamer.EmitIntValue(0, IntPtrSize); AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection()); - EmitCamlGlobal(getModule(), AP, "frametable"); + EmitCamlGlobal(M, AP, "frametable"); int NumDescriptors = 0; - for (iterator I = begin(), IE = end(); I != IE; ++I) { + for (GCModuleInfo::FuncInfoVec::iterator I = Info.funcinfo_begin(), + IE = Info.funcinfo_end(); + I != IE; ++I) { GCFunctionInfo &FI = **I; + if (FI.getStrategy().getName() != getStrategy().getName()) + // this function is managed by some other GC + continue; for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) { NumDescriptors++; } } - if (NumDescriptors >= 1<<16) { + if (NumDescriptors >= 1 << 16) { // Very rude! report_fatal_error(" Too much descriptor for ocaml GC"); } AP.EmitInt16(NumDescriptors); AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3); - for (iterator I = begin(), IE = end(); I != IE; ++I) { + for (GCModuleInfo::FuncInfoVec::iterator I = Info.funcinfo_begin(), + IE = Info.funcinfo_end(); + I != IE; ++I) { GCFunctionInfo &FI = **I; + if (FI.getStrategy().getName() != getStrategy().getName()) + // this function is managed by some other GC + continue; uint64_t FrameSize = FI.getFrameSize(); - if (FrameSize >= 1<<16) { + if (FrameSize >= 1 << 16) { // Very rude! report_fatal_error("Function '" + FI.getFunction().getName() + "' is too large for the ocaml GC! " - "Frame size " + Twine(FrameSize) + ">= 65536.\n" - "(" + Twine(uintptr_t(&FI)) + ")"); + "Frame size " + + Twine(FrameSize) + ">= 65536.\n" + "(" + + Twine(uintptr_t(&FI)) + ")"); } AP.OutStreamer.AddComment("live roots for " + @@ -140,11 +152,12 @@ void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) { for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) { size_t LiveCount = FI.live_size(J); - if (LiveCount >= 1<<16) { + if (LiveCount >= 1 << 16) { // Very rude! report_fatal_error("Function '" + FI.getFunction().getName() + "' is too large for the ocaml GC! " - "Live root count "+Twine(LiveCount)+" >= 65536."); + "Live root count " + + Twine(LiveCount) + " >= 65536."); } AP.OutStreamer.EmitSymbolValue(J->Label, IntPtrSize); @@ -152,12 +165,13 @@ void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) { AP.EmitInt16(LiveCount); for (GCFunctionInfo::live_iterator K = FI.live_begin(J), - KE = FI.live_end(J); K != KE; ++K) { - if (K->StackOffset >= 1<<16) { + KE = FI.live_end(J); + K != KE; ++K) { + if (K->StackOffset >= 1 << 16) { // Very rude! report_fatal_error( - "GC root stack offset is outside of fixed stack frame and out " - "of range for ocaml GC!"); + "GC root stack offset is outside of fixed stack frame and out " + "of range for ocaml GC!"); } AP.EmitInt16(K->StackOffset); } diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/lib/CodeGen/AsmPrinter/Win64Exception.cpp index 0f0ad75..2b03877 100644 --- a/lib/CodeGen/AsmPrinter/Win64Exception.cpp +++ b/lib/CodeGen/AsmPrinter/Win64Exception.cpp @@ -60,7 +60,7 @@ void Win64Exception::beginFunction(const MachineFunction *MF) { const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); unsigned PerEncoding = TLOF.getPersonalityEncoding(); - const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()]; + const Function *Per = MF->getMMI().getPersonality(); shouldEmitPersonality = hasLandingPads && PerEncoding != dwarf::DW_EH_PE_omit && Per; @@ -99,9 +99,151 @@ void Win64Exception::endFunction(const MachineFunction *) { if (shouldEmitPersonality) { Asm->OutStreamer.PushSection(); + + // Emit an UNWIND_INFO struct describing the prologue. Asm->OutStreamer.EmitWinEHHandlerData(); - emitExceptionTable(); + + // Emit the tables appropriate to the personality function in use. If we + // don't recognize the personality, assume it uses an Itanium-style LSDA. + EHPersonality Per = MMI->getPersonalityType(); + if (Per == EHPersonality::MSVC_Win64SEH) + emitCSpecificHandlerTable(); + else + emitExceptionTable(); + Asm->OutStreamer.PopSection(); } Asm->OutStreamer.EmitWinCFIEndProc(); } + +const MCSymbolRefExpr *Win64Exception::createImageRel32(const MCSymbol *Value) { + return MCSymbolRefExpr::Create(Value, MCSymbolRefExpr::VK_COFF_IMGREL32, + Asm->OutContext); +} + +/// Emit the language-specific data that __C_specific_handler expects. This +/// handler lives in the x64 Microsoft C runtime and allows catching or cleaning +/// up after faults with __try, __except, and __finally. The typeinfo values +/// are not really RTTI data, but pointers to filter functions that return an +/// integer (1, 0, or -1) indicating how to handle the exception. For __finally +/// blocks and other cleanups, the landing pad label is zero, and the filter +/// function is actually a cleanup handler with the same prototype. A catch-all +/// entry is modeled with a null filter function field and a non-zero landing +/// pad label. +/// +/// Possible filter function return values: +/// EXCEPTION_EXECUTE_HANDLER (1): +/// Jump to the landing pad label after cleanups. +/// EXCEPTION_CONTINUE_SEARCH (0): +/// Continue searching this table or continue unwinding. +/// EXCEPTION_CONTINUE_EXECUTION (-1): +/// Resume execution at the trapping PC. +/// +/// Inferred table structure: +/// struct Table { +/// int NumEntries; +/// struct Entry { +/// imagerel32 LabelStart; +/// imagerel32 LabelEnd; +/// imagerel32 FilterOrFinally; // One means catch-all. +/// imagerel32 LabelLPad; // Zero means __finally. +/// } Entries[NumEntries]; +/// }; +void Win64Exception::emitCSpecificHandlerTable() { + const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads(); + + // Simplifying assumptions for first implementation: + // - Cleanups are not implemented. + // - Filters are not implemented. + + // The Itanium LSDA table sorts similar landing pads together to simplify the + // actions table, but we don't need that. + SmallVector<const LandingPadInfo *, 64> LandingPads; + LandingPads.reserve(PadInfos.size()); + for (const auto &LP : PadInfos) + LandingPads.push_back(&LP); + + // Compute label ranges for call sites as we would for the Itanium LSDA, but + // use an all zero action table because we aren't using these actions. + SmallVector<unsigned, 64> FirstActions; + FirstActions.resize(LandingPads.size()); + SmallVector<CallSiteEntry, 64> CallSites; + computeCallSiteTable(CallSites, LandingPads, FirstActions); + + MCSymbol *EHFuncBeginSym = + Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber()); + MCSymbol *EHFuncEndSym = + Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber()); + + // Emit the number of table entries. + unsigned NumEntries = 0; + for (const CallSiteEntry &CSE : CallSites) { + if (!CSE.LPad) + continue; // Ignore gaps. + for (int Selector : CSE.LPad->TypeIds) { + // Ignore C++ filter clauses in SEH. + // FIXME: Implement cleanup clauses. + if (isCatchEHSelector(Selector)) + ++NumEntries; + } + } + Asm->OutStreamer.EmitIntValue(NumEntries, 4); + + // Emit the four-label records for each call site entry. The table has to be + // sorted in layout order, and the call sites should already be sorted. + for (const CallSiteEntry &CSE : CallSites) { + // Ignore gaps. Unlike the Itanium model, unwinding through a frame without + // an EH table entry will propagate the exception rather than terminating + // the program. + if (!CSE.LPad) + continue; + const LandingPadInfo *LPad = CSE.LPad; + + // Compute the label range. We may reuse the function begin and end labels + // rather than forming new ones. + const MCExpr *Begin = + createImageRel32(CSE.BeginLabel ? CSE.BeginLabel : EHFuncBeginSym); + const MCExpr *End; + if (CSE.EndLabel) { + // The interval is half-open, so we have to add one to include the return + // address of the last invoke in the range. + End = MCBinaryExpr::CreateAdd(createImageRel32(CSE.EndLabel), + MCConstantExpr::Create(1, Asm->OutContext), + Asm->OutContext); + } else { + End = createImageRel32(EHFuncEndSym); + } + + // These aren't really type info globals, they are actually pointers to + // filter functions ordered by selector. The zero selector is used for + // cleanups, so slot zero corresponds to selector 1. + const std::vector<const GlobalValue *> &SelectorToFilter = MMI->getTypeInfos(); + + // Do a parallel iteration across typeids and clause labels, skipping filter + // clauses. + size_t NextClauseLabel = 0; + for (size_t I = 0, E = LPad->TypeIds.size(); I < E; ++I) { + // AddLandingPadInfo stores the clauses in reverse, but there is a FIXME + // to change that. + int Selector = LPad->TypeIds[E - I - 1]; + + // Ignore C++ filter clauses in SEH. + // FIXME: Implement cleanup clauses. + if (!isCatchEHSelector(Selector)) + continue; + + Asm->OutStreamer.EmitValue(Begin, 4); + Asm->OutStreamer.EmitValue(End, 4); + if (isCatchEHSelector(Selector)) { + assert(unsigned(Selector - 1) < SelectorToFilter.size()); + const GlobalValue *TI = SelectorToFilter[Selector - 1]; + if (TI) // Emit the filter function pointer. + Asm->OutStreamer.EmitValue(createImageRel32(Asm->getSymbol(TI)), 4); + else // Otherwise, this is a "catch i8* null", or catch all. + Asm->OutStreamer.EmitIntValue(1, 4); + } + MCSymbol *ClauseLabel = LPad->ClauseLabels[NextClauseLabel++]; + Asm->OutStreamer.EmitValue(createImageRel32(ClauseLabel), 4); + } + } +} diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.h b/lib/CodeGen/AsmPrinter/Win64Exception.h index 538e132..b2d5d1b 100644 --- a/lib/CodeGen/AsmPrinter/Win64Exception.h +++ b/lib/CodeGen/AsmPrinter/Win64Exception.h @@ -29,6 +29,10 @@ class Win64Exception : public EHStreamer { /// Per-function flag to indicate if frame moves info should be emitted. bool shouldEmitMoves; + void emitCSpecificHandlerTable(); + + const MCSymbolRefExpr *createImageRel32(const MCSymbol *Value); + public: //===--------------------------------------------------------------------===// // Main entry points. diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp index 12f6bd7..4b64be0 100644 --- a/lib/CodeGen/AtomicExpandPass.cpp +++ b/lib/CodeGen/AtomicExpandPass.cpp @@ -31,10 +31,11 @@ using namespace llvm; namespace { class AtomicExpand: public FunctionPass { const TargetMachine *TM; + const TargetLowering *TLI; public: static char ID; // Pass identification, replacement for typeid explicit AtomicExpand(const TargetMachine *TM = nullptr) - : FunctionPass(ID), TM(TM) { + : FunctionPass(ID), TM(TM), TLI(nullptr) { initializeAtomicExpandPass(*PassRegistry::getPassRegistry()); } @@ -67,9 +68,9 @@ FunctionPass *llvm::createAtomicExpandPass(const TargetMachine *TM) { } bool AtomicExpand::runOnFunction(Function &F) { - if (!TM || !TM->getSubtargetImpl()->enableAtomicExpand()) + if (!TM || !TM->getSubtargetImpl(F)->enableAtomicExpand()) return false; - auto TargetLowering = TM->getSubtargetImpl()->getTargetLowering(); + TLI = TM->getSubtargetImpl(F)->getTargetLowering(); SmallVector<Instruction *, 1> AtomicInsts; @@ -91,7 +92,7 @@ bool AtomicExpand::runOnFunction(Function &F) { auto FenceOrdering = Monotonic; bool IsStore, IsLoad; - if (TargetLowering->getInsertFencesForAtomic()) { + if (TLI->getInsertFencesForAtomic()) { if (LI && isAtLeastAcquire(LI->getOrdering())) { FenceOrdering = LI->getOrdering(); LI->setOrdering(Monotonic); @@ -107,9 +108,9 @@ bool AtomicExpand::runOnFunction(Function &F) { FenceOrdering = RMWI->getOrdering(); RMWI->setOrdering(Monotonic); IsStore = IsLoad = true; - } else if (CASI && !TargetLowering->hasLoadLinkedStoreConditional() && - (isAtLeastRelease(CASI->getSuccessOrdering()) || - isAtLeastAcquire(CASI->getSuccessOrdering()))) { + } else if (CASI && !TLI->hasLoadLinkedStoreConditional() && + (isAtLeastRelease(CASI->getSuccessOrdering()) || + isAtLeastAcquire(CASI->getSuccessOrdering()))) { // If a compare and swap is lowered to LL/SC, we can do smarter fence // insertion, with a stronger one on the success path than on the // failure path. As a result, fence insertion is directly done by @@ -125,20 +126,19 @@ bool AtomicExpand::runOnFunction(Function &F) { } } - if (LI && TargetLowering->shouldExpandAtomicLoadInIR(LI)) { + if (LI && TLI->shouldExpandAtomicLoadInIR(LI)) { MadeChange |= expandAtomicLoad(LI); - } else if (SI && TargetLowering->shouldExpandAtomicStoreInIR(SI)) { + } else if (SI && TLI->shouldExpandAtomicStoreInIR(SI)) { MadeChange |= expandAtomicStore(SI); } else if (RMWI) { // There are two different ways of expanding RMW instructions: // - into a load if it is idempotent // - into a Cmpxchg/LL-SC loop otherwise // we try them in that order. - MadeChange |= (isIdempotentRMW(RMWI) && - simplifyIdempotentRMW(RMWI)) || - (TargetLowering->shouldExpandAtomicRMWInIR(RMWI) && - expandAtomicRMW(RMWI)); - } else if (CASI && TargetLowering->hasLoadLinkedStoreConditional()) { + MadeChange |= + (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) || + (TLI->shouldExpandAtomicRMWInIR(RMWI) && expandAtomicRMW(RMWI)); + } else if (CASI && TLI->hasLoadLinkedStoreConditional()) { MadeChange |= expandAtomicCmpXchg(CASI); } } @@ -149,13 +149,9 @@ bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order, bool IsStore, bool IsLoad) { IRBuilder<> Builder(I); - auto LeadingFence = - TM->getSubtargetImpl()->getTargetLowering()->emitLeadingFence( - Builder, Order, IsStore, IsLoad); + auto LeadingFence = TLI->emitLeadingFence(Builder, Order, IsStore, IsLoad); - auto TrailingFence = - TM->getSubtargetImpl()->getTargetLowering()->emitTrailingFence( - Builder, Order, IsStore, IsLoad); + auto TrailingFence = TLI->emitTrailingFence(Builder, Order, IsStore, IsLoad); // The trailing fence is emitted before the instruction instead of after // because there is no easy way of setting Builder insertion point after // an instruction. So we must erase it from the BB, and insert it back @@ -171,16 +167,13 @@ bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order, } bool AtomicExpand::expandAtomicLoad(LoadInst *LI) { - if (TM->getSubtargetImpl() - ->getTargetLowering() - ->hasLoadLinkedStoreConditional()) + if (TLI->hasLoadLinkedStoreConditional()) return expandAtomicLoadToLL(LI); else return expandAtomicLoadToCmpXchg(LI); } bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) { - auto TLI = TM->getSubtargetImpl()->getTargetLowering(); IRBuilder<> Builder(LI); // On some architectures, load-linked instructions are atomic for larger @@ -231,9 +224,7 @@ bool AtomicExpand::expandAtomicStore(StoreInst *SI) { } bool AtomicExpand::expandAtomicRMW(AtomicRMWInst *AI) { - if (TM->getSubtargetImpl() - ->getTargetLowering() - ->hasLoadLinkedStoreConditional()) + if (TLI->hasLoadLinkedStoreConditional()) return expandAtomicRMWToLLSC(AI); else return expandAtomicRMWToCmpXchg(AI); @@ -277,7 +268,6 @@ static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, } bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) { - auto TLI = TM->getSubtargetImpl()->getTargetLowering(); AtomicOrdering MemOpOrder = AI->getOrdering(); Value *Addr = AI->getPointerOperand(); BasicBlock *BB = AI->getParent(); @@ -397,7 +387,6 @@ bool AtomicExpand::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI) { } bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { - auto TLI = TM->getSubtargetImpl()->getTargetLowering(); AtomicOrdering SuccessOrder = CI->getSuccessOrdering(); AtomicOrdering FailureOrder = CI->getFailureOrdering(); Value *Addr = CI->getPointerOperand(); @@ -551,13 +540,10 @@ bool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) { } bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) { - auto TLI = TM->getSubtargetImpl()->getTargetLowering(); - if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) { if (TLI->shouldExpandAtomicLoadInIR(ResultingLoad)) expandAtomicLoad(ResultingLoad); return true; } - return false; } diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp index b9b1fd8..82f5c48 100644 --- a/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -15,633 +15,23 @@ /// //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/BasicTTIImpl.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/TargetTransformInfoImpl.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <utility> using namespace llvm; -static cl::opt<unsigned> -PartialUnrollingThreshold("partial-unrolling-threshold", cl::init(0), - cl::desc("Threshold for partial unrolling"), cl::Hidden); - #define DEBUG_TYPE "basictti" -namespace { - -class BasicTTI final : public ImmutablePass, public TargetTransformInfo { - const TargetMachine *TM; - - /// Estimate the overhead of scalarizing an instruction. Insert and Extract - /// are set if the result needs to be inserted and/or extracted from vectors. - unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; - - /// Estimate the cost overhead of SK_Alternate shuffle. - unsigned getAltShuffleOverhead(Type *Ty) const; - - const TargetLoweringBase *getTLI() const { - return TM->getSubtargetImpl()->getTargetLowering(); - } - -public: - BasicTTI() : ImmutablePass(ID), TM(nullptr) { - llvm_unreachable("This pass cannot be directly constructed"); - } - - BasicTTI(const TargetMachine *TM) : ImmutablePass(ID), TM(TM) { - initializeBasicTTIPass(*PassRegistry::getPassRegistry()); - } - - void initializePass() override { - pushTTIStack(this); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - TargetTransformInfo::getAnalysisUsage(AU); - } - - /// Pass identification. - static char ID; - - /// Provide necessary pointer adjustments for the two base classes. - void *getAdjustedAnalysisPointer(const void *ID) override { - if (ID == &TargetTransformInfo::ID) - return (TargetTransformInfo*)this; - return this; - } - - bool hasBranchDivergence() const override; - - /// \name Scalar TTI Implementations - /// @{ - - bool isLegalAddImmediate(int64_t imm) const override; - bool isLegalICmpImmediate(int64_t imm) const override; - bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, - int64_t BaseOffset, bool HasBaseReg, - int64_t Scale) const override; - int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, - int64_t BaseOffset, bool HasBaseReg, - int64_t Scale) const override; - bool isTruncateFree(Type *Ty1, Type *Ty2) const override; - bool isTypeLegal(Type *Ty) const override; - unsigned getJumpBufAlignment() const override; - unsigned getJumpBufSize() const override; - bool shouldBuildLookupTables() const override; - bool haveFastSqrt(Type *Ty) const override; - void getUnrollingPreferences(const Function *F, Loop *L, - UnrollingPreferences &UP) const override; - - /// @} - - /// \name Vector TTI Implementations - /// @{ - - unsigned getNumberOfRegisters(bool Vector) const override; - unsigned getMaxInterleaveFactor() const override; - unsigned getRegisterBitWidth(bool Vector) const override; - unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind, - OperandValueKind, OperandValueProperties, - OperandValueProperties) const override; - unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, - int Index, Type *SubTp) const override; - unsigned getCastInstrCost(unsigned Opcode, Type *Dst, - Type *Src) const override; - unsigned getCFInstrCost(unsigned Opcode) const override; - unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy) const override; - unsigned getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) const override; - unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) const override; - unsigned getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy, - ArrayRef<Type*> Tys) const override; - unsigned getNumberOfParts(Type *Tp) const override; - unsigned getAddressComputationCost( Type *Ty, bool IsComplex) const override; - unsigned getReductionCost(unsigned Opcode, Type *Ty, - bool IsPairwise) const override; - - /// @} -}; - -} - -INITIALIZE_AG_PASS(BasicTTI, TargetTransformInfo, "basictti", - "Target independent code generator's TTI", true, true, false) -char BasicTTI::ID = 0; - -ImmutablePass * -llvm::createBasicTargetTransformInfoPass(const TargetMachine *TM) { - return new BasicTTI(TM); -} - -bool BasicTTI::hasBranchDivergence() const { return false; } - -bool BasicTTI::isLegalAddImmediate(int64_t imm) const { - return getTLI()->isLegalAddImmediate(imm); -} - -bool BasicTTI::isLegalICmpImmediate(int64_t imm) const { - return getTLI()->isLegalICmpImmediate(imm); -} - -bool BasicTTI::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, - int64_t BaseOffset, bool HasBaseReg, - int64_t Scale) const { - TargetLoweringBase::AddrMode AM; - AM.BaseGV = BaseGV; - AM.BaseOffs = BaseOffset; - AM.HasBaseReg = HasBaseReg; - AM.Scale = Scale; - return getTLI()->isLegalAddressingMode(AM, Ty); -} - -int BasicTTI::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, - int64_t BaseOffset, bool HasBaseReg, - int64_t Scale) const { - TargetLoweringBase::AddrMode AM; - AM.BaseGV = BaseGV; - AM.BaseOffs = BaseOffset; - AM.HasBaseReg = HasBaseReg; - AM.Scale = Scale; - return getTLI()->getScalingFactorCost(AM, Ty); -} - -bool BasicTTI::isTruncateFree(Type *Ty1, Type *Ty2) const { - return getTLI()->isTruncateFree(Ty1, Ty2); -} - -bool BasicTTI::isTypeLegal(Type *Ty) const { - EVT T = getTLI()->getValueType(Ty); - return getTLI()->isTypeLegal(T); -} - -unsigned BasicTTI::getJumpBufAlignment() const { - return getTLI()->getJumpBufAlignment(); -} - -unsigned BasicTTI::getJumpBufSize() const { - return getTLI()->getJumpBufSize(); -} - -bool BasicTTI::shouldBuildLookupTables() const { - const TargetLoweringBase *TLI = getTLI(); - return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || - TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other); -} - -bool BasicTTI::haveFastSqrt(Type *Ty) const { - const TargetLoweringBase *TLI = getTLI(); - EVT VT = TLI->getValueType(Ty); - return TLI->isTypeLegal(VT) && TLI->isOperationLegalOrCustom(ISD::FSQRT, VT); -} - -void BasicTTI::getUnrollingPreferences(const Function *F, Loop *L, - UnrollingPreferences &UP) const { - // This unrolling functionality is target independent, but to provide some - // motivation for its intended use, for x86: - - // According to the Intel 64 and IA-32 Architectures Optimization Reference - // Manual, Intel Core models and later have a loop stream detector - // (and associated uop queue) that can benefit from partial unrolling. - // The relevant requirements are: - // - The loop must have no more than 4 (8 for Nehalem and later) branches - // taken, and none of them may be calls. - // - The loop can have no more than 18 (28 for Nehalem and later) uops. - - // According to the Software Optimization Guide for AMD Family 15h Processors, - // models 30h-4fh (Steamroller and later) have a loop predictor and loop - // buffer which can benefit from partial unrolling. - // The relevant requirements are: - // - The loop must have fewer than 16 branches - // - The loop must have less than 40 uops in all executed loop branches - - // The number of taken branches in a loop is hard to estimate here, and - // benchmarking has revealed that it is better not to be conservative when - // estimating the branch count. As a result, we'll ignore the branch limits - // until someone finds a case where it matters in practice. - - unsigned MaxOps; - const TargetSubtargetInfo *ST = &TM->getSubtarget<TargetSubtargetInfo>(F); - if (PartialUnrollingThreshold.getNumOccurrences() > 0) - MaxOps = PartialUnrollingThreshold; - else if (ST->getSchedModel().LoopMicroOpBufferSize > 0) - MaxOps = ST->getSchedModel().LoopMicroOpBufferSize; - else - return; - - // Scan the loop: don't unroll loops with calls. - for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); - I != E; ++I) { - BasicBlock *BB = *I; - - for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J) - if (isa<CallInst>(J) || isa<InvokeInst>(J)) { - ImmutableCallSite CS(J); - if (const Function *F = CS.getCalledFunction()) { - if (!TopTTI->isLoweredToCall(F)) - continue; - } - - return; - } - } - - // Enable runtime and partial unrolling up to the specified size. - UP.Partial = UP.Runtime = true; - UP.PartialThreshold = UP.PartialOptSizeThreshold = MaxOps; -} - -//===----------------------------------------------------------------------===// -// -// Calls used by the vectorizers. -// -//===----------------------------------------------------------------------===// - -unsigned BasicTTI::getScalarizationOverhead(Type *Ty, bool Insert, - bool Extract) const { - assert (Ty->isVectorTy() && "Can only scalarize vectors"); - unsigned Cost = 0; - - for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { - if (Insert) - Cost += TopTTI->getVectorInstrCost(Instruction::InsertElement, Ty, i); - if (Extract) - Cost += TopTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, i); - } - - return Cost; -} - -unsigned BasicTTI::getNumberOfRegisters(bool Vector) const { - return 1; -} - -unsigned BasicTTI::getRegisterBitWidth(bool Vector) const { - return 32; -} - -unsigned BasicTTI::getMaxInterleaveFactor() const { - return 1; -} - -unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, - OperandValueKind, OperandValueKind, - OperandValueProperties, - OperandValueProperties) const { - // Check if any of the operands are vector operands. - const TargetLoweringBase *TLI = getTLI(); - int ISD = TLI->InstructionOpcodeToISD(Opcode); - assert(ISD && "Invalid opcode"); - - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty); - - bool IsFloat = Ty->getScalarType()->isFloatingPointTy(); - // Assume that floating point arithmetic operations cost twice as much as - // integer operations. - unsigned OpCost = (IsFloat ? 2 : 1); - - if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { - // The operation is legal. Assume it costs 1. - // If the type is split to multiple registers, assume that there is some - // overhead to this. - // TODO: Once we have extract/insert subvector cost we need to use them. - if (LT.first > 1) - return LT.first * 2 * OpCost; - return LT.first * 1 * OpCost; - } - - if (!TLI->isOperationExpand(ISD, LT.second)) { - // If the operation is custom lowered then assume - // thare the code is twice as expensive. - return LT.first * 2 * OpCost; - } - - // Else, assume that we need to scalarize this op. - if (Ty->isVectorTy()) { - unsigned Num = Ty->getVectorNumElements(); - unsigned Cost = TopTTI->getArithmeticInstrCost(Opcode, Ty->getScalarType()); - // return the cost of multiple scalar invocation plus the cost of inserting - // and extracting the values. - return getScalarizationOverhead(Ty, true, true) + Num * Cost; - } - - // We don't know anything about this scalar instruction. - return OpCost; -} - -unsigned BasicTTI::getAltShuffleOverhead(Type *Ty) const { - assert(Ty->isVectorTy() && "Can only shuffle vectors"); - unsigned Cost = 0; - // Shuffle cost is equal to the cost of extracting element from its argument - // plus the cost of inserting them onto the result vector. - - // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from index - // 0 of first vector, index 1 of second vector,index 2 of first vector and - // finally index 3 of second vector and insert them at index <0,1,2,3> of - // result vector. - for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { - Cost += TopTTI->getVectorInstrCost(Instruction::InsertElement, Ty, i); - Cost += TopTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, i); - } - return Cost; -} - -unsigned BasicTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, - Type *SubTp) const { - if (Kind == SK_Alternate) { - return getAltShuffleOverhead(Tp); - } - return 1; -} - -unsigned BasicTTI::getCastInstrCost(unsigned Opcode, Type *Dst, - Type *Src) const { - const TargetLoweringBase *TLI = getTLI(); - int ISD = TLI->InstructionOpcodeToISD(Opcode); - assert(ISD && "Invalid opcode"); - - std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(Src); - std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(Dst); - - // Check for NOOP conversions. - if (SrcLT.first == DstLT.first && - SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { - - // Bitcast between types that are legalized to the same type are free. - if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc) - return 0; - } - - if (Opcode == Instruction::Trunc && - TLI->isTruncateFree(SrcLT.second, DstLT.second)) - return 0; - - if (Opcode == Instruction::ZExt && - TLI->isZExtFree(SrcLT.second, DstLT.second)) - return 0; - - // If the cast is marked as legal (or promote) then assume low cost. - if (SrcLT.first == DstLT.first && - TLI->isOperationLegalOrPromote(ISD, DstLT.second)) - return 1; - - // Handle scalar conversions. - if (!Src->isVectorTy() && !Dst->isVectorTy()) { - - // Scalar bitcasts are usually free. - if (Opcode == Instruction::BitCast) - return 0; - - // Just check the op cost. If the operation is legal then assume it costs 1. - if (!TLI->isOperationExpand(ISD, DstLT.second)) - return 1; - - // Assume that illegal scalar instruction are expensive. - return 4; - } - - // Check vector-to-vector casts. - if (Dst->isVectorTy() && Src->isVectorTy()) { - - // If the cast is between same-sized registers, then the check is simple. - if (SrcLT.first == DstLT.first && - SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { - - // Assume that Zext is done using AND. - if (Opcode == Instruction::ZExt) - return 1; - - // Assume that sext is done using SHL and SRA. - if (Opcode == Instruction::SExt) - return 2; - - // Just check the op cost. If the operation is legal then assume it costs - // 1 and multiply by the type-legalization overhead. - if (!TLI->isOperationExpand(ISD, DstLT.second)) - return SrcLT.first * 1; - } - - // If we are converting vectors and the operation is illegal, or - // if the vectors are legalized to different types, estimate the - // scalarization costs. - unsigned Num = Dst->getVectorNumElements(); - unsigned Cost = TopTTI->getCastInstrCost(Opcode, Dst->getScalarType(), - Src->getScalarType()); - - // Return the cost of multiple scalar invocation plus the cost of - // inserting and extracting the values. - return getScalarizationOverhead(Dst, true, true) + Num * Cost; - } - - // We already handled vector-to-vector and scalar-to-scalar conversions. This - // is where we handle bitcast between vectors and scalars. We need to assume - // that the conversion is scalarized in one way or another. - if (Opcode == Instruction::BitCast) - // Illegal bitcasts are done by storing and loading from a stack slot. - return (Src->isVectorTy()? getScalarizationOverhead(Src, false, true):0) + - (Dst->isVectorTy()? getScalarizationOverhead(Dst, true, false):0); - - llvm_unreachable("Unhandled cast"); - } - -unsigned BasicTTI::getCFInstrCost(unsigned Opcode) const { - // Branches are assumed to be predicted. - return 0; -} - -unsigned BasicTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy) const { - const TargetLoweringBase *TLI = getTLI(); - int ISD = TLI->InstructionOpcodeToISD(Opcode); - assert(ISD && "Invalid opcode"); - - // Selects on vectors are actually vector selects. - if (ISD == ISD::SELECT) { - assert(CondTy && "CondTy must exist"); - if (CondTy->isVectorTy()) - ISD = ISD::VSELECT; - } - - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy); - - if (!(ValTy->isVectorTy() && !LT.second.isVector()) && - !TLI->isOperationExpand(ISD, LT.second)) { - // The operation is legal. Assume it costs 1. Multiply - // by the type-legalization overhead. - return LT.first * 1; - } - - // Otherwise, assume that the cast is scalarized. - if (ValTy->isVectorTy()) { - unsigned Num = ValTy->getVectorNumElements(); - if (CondTy) - CondTy = CondTy->getScalarType(); - unsigned Cost = TopTTI->getCmpSelInstrCost(Opcode, ValTy->getScalarType(), - CondTy); - - // Return the cost of multiple scalar invocation plus the cost of inserting - // and extracting the values. - return getScalarizationOverhead(ValTy, true, false) + Num * Cost; - } - - // Unknown scalar opcode. - return 1; -} - -unsigned BasicTTI::getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) const { - std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Val->getScalarType()); - - return LT.first; -} - -unsigned BasicTTI::getMemoryOpCost(unsigned Opcode, Type *Src, - unsigned Alignment, - unsigned AddressSpace) const { - assert(!Src->isVoidTy() && "Invalid type"); - std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Src); - - // Assuming that all loads of legal types cost 1. - unsigned Cost = LT.first; - - if (Src->isVectorTy() && - Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) { - // This is a vector load that legalizes to a larger type than the vector - // itself. Unless the corresponding extending load or truncating store is - // legal, then this will scalarize. - TargetLowering::LegalizeAction LA = TargetLowering::Expand; - EVT MemVT = getTLI()->getValueType(Src, true); - if (MemVT.isSimple() && MemVT != MVT::Other) { - if (Opcode == Instruction::Store) - LA = getTLI()->getTruncStoreAction(LT.second, MemVT.getSimpleVT()); - else - LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, MemVT.getSimpleVT()); - } - - if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) { - // This is a vector load/store for some illegal type that is scalarized. - // We must account for the cost of building or decomposing the vector. - Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store, - Opcode == Instruction::Store); - } - } - - return Cost; -} - -unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef<Type *> Tys) const { - unsigned ISD = 0; - switch (IID) { - default: { - // Assume that we need to scalarize this intrinsic. - unsigned ScalarizationCost = 0; - unsigned ScalarCalls = 1; - if (RetTy->isVectorTy()) { - ScalarizationCost = getScalarizationOverhead(RetTy, true, false); - ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); - } - for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { - if (Tys[i]->isVectorTy()) { - ScalarizationCost += getScalarizationOverhead(Tys[i], false, true); - ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); - } - } - - return ScalarCalls + ScalarizationCost; - } - // Look for intrinsics that can be lowered directly or turned into a scalar - // intrinsic call. - case Intrinsic::sqrt: ISD = ISD::FSQRT; break; - case Intrinsic::sin: ISD = ISD::FSIN; break; - case Intrinsic::cos: ISD = ISD::FCOS; break; - case Intrinsic::exp: ISD = ISD::FEXP; break; - case Intrinsic::exp2: ISD = ISD::FEXP2; break; - case Intrinsic::log: ISD = ISD::FLOG; break; - case Intrinsic::log10: ISD = ISD::FLOG10; break; - case Intrinsic::log2: ISD = ISD::FLOG2; break; - case Intrinsic::fabs: ISD = ISD::FABS; break; - case Intrinsic::minnum: ISD = ISD::FMINNUM; break; - case Intrinsic::maxnum: ISD = ISD::FMAXNUM; break; - case Intrinsic::copysign: ISD = ISD::FCOPYSIGN; break; - case Intrinsic::floor: ISD = ISD::FFLOOR; break; - case Intrinsic::ceil: ISD = ISD::FCEIL; break; - case Intrinsic::trunc: ISD = ISD::FTRUNC; break; - case Intrinsic::nearbyint: - ISD = ISD::FNEARBYINT; break; - case Intrinsic::rint: ISD = ISD::FRINT; break; - case Intrinsic::round: ISD = ISD::FROUND; break; - case Intrinsic::pow: ISD = ISD::FPOW; break; - case Intrinsic::fma: ISD = ISD::FMA; break; - case Intrinsic::fmuladd: ISD = ISD::FMA; break; - // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free. - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - return 0; - } - - const TargetLoweringBase *TLI = getTLI(); - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(RetTy); - - if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { - // The operation is legal. Assume it costs 1. - // If the type is split to multiple registers, assume that there is some - // overhead to this. - // TODO: Once we have extract/insert subvector cost we need to use them. - if (LT.first > 1) - return LT.first * 2; - return LT.first * 1; - } - - if (!TLI->isOperationExpand(ISD, LT.second)) { - // If the operation is custom lowered then assume - // thare the code is twice as expensive. - return LT.first * 2; - } - - // If we can't lower fmuladd into an FMA estimate the cost as a floating - // point mul followed by an add. - if (IID == Intrinsic::fmuladd) - return TopTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) + - TopTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy); - - // Else, assume that we need to scalarize this intrinsic. For math builtins - // this will emit a costly libcall, adding call overhead and spills. Make it - // very expensive. - if (RetTy->isVectorTy()) { - unsigned Num = RetTy->getVectorNumElements(); - unsigned Cost = TopTTI->getIntrinsicInstrCost(IID, RetTy->getScalarType(), - Tys); - return 10 * Cost * Num; - } - - // This is going to be turned into a library call, make it expensive. - return 10; -} - -unsigned BasicTTI::getNumberOfParts(Type *Tp) const { - std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Tp); - return LT.first; -} - -unsigned BasicTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const { - return 0; -} +// This flag is used by the template base class for BasicTTIImpl, and here to +// provide a definition. +cl::opt<unsigned> + llvm::PartialUnrollingThreshold("partial-unrolling-threshold", cl::init(0), + cl::desc("Threshold for partial unrolling"), + cl::Hidden); -unsigned BasicTTI::getReductionCost(unsigned Opcode, Type *Ty, - bool IsPairwise) const { - assert(Ty->isVectorTy() && "Expect a vector type"); - unsigned NumVecElts = Ty->getVectorNumElements(); - unsigned NumReduxLevels = Log2_32(NumVecElts); - unsigned ArithCost = NumReduxLevels * - TopTTI->getArithmeticInstrCost(Opcode, Ty); - // Assume the pairwise shuffles add a cost. - unsigned ShuffleCost = - NumReduxLevels * (IsPairwise + 1) * - TopTTI->getShuffleCost(SK_ExtractSubvector, Ty, NumVecElts / 2, Ty); - return ShuffleCost + ArithCost + getScalarizationOverhead(Ty, false, true); -} +BasicTTIImpl::BasicTTIImpl(const TargetMachine *TM, Function &F) + : BaseT(TM), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 2128da1..b8f05cd 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -601,8 +601,7 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1, // instructions that would be deleted in the merge. MachineFunction *MF = MBB1->getParent(); if (EffectiveTailLen >= 2 && - MF->getFunction()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize) && + MF->getFunction()->hasFnAttribute(Attribute::OptimizeForSize) && (I1 == MBB1->begin() || I2 == MBB2->begin())) return true; diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 092346b..f21d4d2 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -22,6 +22,7 @@ add_llvm_library(LLVMCodeGen ForwardControlFlowIntegrity.cpp GCMetadata.cpp GCMetadataPrinter.cpp + GCRootLowering.cpp GCStrategy.cpp GlobalMerge.cpp IfConversion.cpp @@ -95,6 +96,7 @@ add_llvm_library(LLVMCodeGen ScheduleDAGPrinter.cpp ScoreboardHazardRecognizer.cpp ShadowStackGC.cpp + ShadowStackGCLowering.cpp SjLjEHPrepare.cpp SlotIndexes.cpp SpillPlacement.cpp @@ -104,6 +106,7 @@ add_llvm_library(LLVMCodeGen StackSlotColoring.cpp StackMapLivenessAnalysis.cpp StackMaps.cpp + StatepointExampleGC.cpp TailDuplication.cpp TargetFrameLoweringImpl.cpp TargetInstrInfo.cpp @@ -115,6 +118,11 @@ add_llvm_library(LLVMCodeGen TwoAddressInstructionPass.cpp UnreachableBlockElim.cpp VirtRegMap.cpp + WinEHPrepare.cpp + + ADDITIONAL_HEADER_DIRS + ${LLVM_MAIN_INCLUDE_DIR}/llvm/CodeGen + ${LLVM_MAIN_INCLUDE_DIR}/llvm/CodeGen/PBQP ) add_dependencies(LLVMCodeGen intrinsics_gen) diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp index 56ecde0..034ffb3 100644 --- a/lib/CodeGen/CallingConvLower.cpp +++ b/lib/CodeGen/CallingConvLower.cpp @@ -14,9 +14,11 @@ #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/SaveAndRestore.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -178,3 +180,70 @@ void CCState::AnalyzeCallResult(MVT VT, CCAssignFn Fn) { llvm_unreachable(nullptr); } } + +static bool isValueTypeInRegForCC(CallingConv::ID CC, MVT VT) { + if (VT.isVector()) + return true; // Assume -msse-regparm might be in effect. + if (!VT.isInteger()) + return false; + if (CC == CallingConv::X86_VectorCall || CC == CallingConv::X86_FastCall) + return true; + return false; +} + +void CCState::getRemainingRegParmsForType(SmallVectorImpl<MCPhysReg> &Regs, + MVT VT, CCAssignFn Fn) { + unsigned SavedStackOffset = StackOffset; + unsigned NumLocs = Locs.size(); + + // Set the 'inreg' flag if it is used for this calling convention. + ISD::ArgFlagsTy Flags; + if (isValueTypeInRegForCC(CallingConv, VT)) + Flags.setInReg(); + + // Allocate something of this value type repeatedly until we get assigned a + // location in memory. + bool HaveRegParm = true; + while (HaveRegParm) { + if (Fn(0, VT, VT, CCValAssign::Full, Flags, *this)) { +#ifndef NDEBUG + dbgs() << "Call has unhandled type " << EVT(VT).getEVTString() + << " while computing remaining regparms\n"; +#endif + llvm_unreachable(nullptr); + } + HaveRegParm = Locs.back().isRegLoc(); + } + + // Copy all the registers from the value locations we added. + assert(NumLocs < Locs.size() && "CC assignment failed to add location"); + for (unsigned I = NumLocs, E = Locs.size(); I != E; ++I) + if (Locs[I].isRegLoc()) + Regs.push_back(MCPhysReg(Locs[I].getLocReg())); + + // Clear the assigned values and stack memory. We leave the registers marked + // as allocated so that future queries don't return the same registers, i.e. + // when i64 and f64 are both passed in GPRs. + StackOffset = SavedStackOffset; + Locs.resize(NumLocs); +} + +void CCState::analyzeMustTailForwardedRegisters( + SmallVectorImpl<ForwardedRegister> &Forwards, ArrayRef<MVT> RegParmTypes, + CCAssignFn Fn) { + // Oftentimes calling conventions will not user register parameters for + // variadic functions, so we need to assume we're not variadic so that we get + // all the registers that might be used in a non-variadic call. + SaveAndRestore<bool> SavedVarArg(IsVarArg, false); + + for (MVT RegVT : RegParmTypes) { + SmallVector<MCPhysReg, 8> RemainingRegs; + getRemainingRegParmsForType(RemainingRegs, RegVT, Fn); + const TargetLowering *TL = MF.getSubtarget().getTargetLowering(); + const TargetRegisterClass *RC = TL->getRegClassFor(RegVT); + for (MCPhysReg PReg : RemainingRegs) { + unsigned VReg = MF.addLiveIn(PReg, RC); + Forwards.push_back(ForwardedRegister(VReg, PReg, RegVT)); + } + } +} diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index 307dec5..7c0068e 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -21,7 +21,6 @@ using namespace llvm; /// initializeCodeGen - Initialize all passes linked into the CodeGen library. void llvm::initializeCodeGen(PassRegistry &Registry) { initializeAtomicExpandPass(Registry); - initializeBasicTTIPass(Registry); initializeBranchFolderPassPass(Registry); initializeCodeGenPreparePass(Registry); initializeDeadMachineInstructionElimPass(Registry); diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index 8d20848..c0d7dca 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" @@ -30,20 +31,22 @@ #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Statepoint.h" #include "llvm/IR/ValueHandle.h" #include "llvm/IR/ValueMap.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/BypassSlowDivision.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/SimplifyLibCalls.h" using namespace llvm; using namespace llvm::PatternMatch; @@ -70,6 +73,10 @@ static cl::opt<bool> DisableBranchOpts( "disable-cgp-branch-opts", cl::Hidden, cl::init(false), cl::desc("Disable branch optimizations in CodeGenPrepare")); +static cl::opt<bool> + DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false), + cl::desc("Disable GC optimizations in CodeGenPrepare")); + static cl::opt<bool> DisableSelectToBranch( "disable-cgp-select2branch", cl::Hidden, cl::init(false), cl::desc("Disable select to branch conversion.")); @@ -90,6 +97,16 @@ static cl::opt<bool> StressStoreExtract( "stress-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Stress test store(extract) optimizations in CodeGenPrepare")); +static cl::opt<bool> DisableExtLdPromotion( + "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), + cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in " + "CodeGenPrepare")); + +static cl::opt<bool> StressExtLdPromotion( + "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), + cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) " + "optimization in CodeGenPrepare")); + namespace { typedef SmallPtrSet<Instruction *, 16> SetOfInstrs; struct TypeIsSExt { @@ -98,6 +115,7 @@ struct TypeIsSExt { TypeIsSExt(Type *Ty, bool IsSExt) : Ty(Ty), IsSExt(IsSExt) {} }; typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy; +class TypePromotionTransaction; class CodeGenPrepare : public FunctionPass { /// TLI - Keep a pointer of a TargetLowering to consult for determining @@ -143,8 +161,8 @@ typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy; void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addPreserved<DominatorTreeWrapperPass>(); - AU.addRequired<TargetLibraryInfo>(); - AU.addRequired<TargetTransformInfo>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); + AU.addRequired<TargetTransformInfoWrapperPass>(); } private: @@ -152,12 +170,12 @@ typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy; bool EliminateMostlyEmptyBlocks(Function &F); bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; void EliminateMostlyEmptyBlock(BasicBlock *BB); - bool OptimizeBlock(BasicBlock &BB); - bool OptimizeInst(Instruction *I); + bool OptimizeBlock(BasicBlock &BB, bool& ModifiedDT); + bool OptimizeInst(Instruction *I, bool& ModifiedDT); bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy); bool OptimizeInlineAsmInst(CallInst *CS); - bool OptimizeCallInst(CallInst *CI); - bool MoveExtToFormExtLoad(Instruction *I); + bool OptimizeCallInst(CallInst *CI, bool& ModifiedDT); + bool MoveExtToFormExtLoad(Instruction *&I); bool OptimizeExtUses(Instruction *I); bool OptimizeSelectInst(SelectInst *SI); bool OptimizeShuffleVectorInst(ShuffleVectorInst *SI); @@ -165,6 +183,12 @@ typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy; bool DupRetToEnableTailCallOpts(BasicBlock *BB); bool PlaceDbgValues(Function &F); bool sinkAndCmp(Function &F); + bool ExtLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI, + Instruction *&Inst, + const SmallVectorImpl<Instruction *> &Exts, + unsigned CreatedInst); + bool splitBranchCondition(Function &F); + bool simplifyOffsetableRelocate(Instruction &I); }; } @@ -187,14 +211,13 @@ bool CodeGenPrepare::runOnFunction(Function &F) { ModifiedDT = false; if (TM) - TLI = TM->getSubtargetImpl()->getTargetLowering(); - TLInfo = &getAnalysis<TargetLibraryInfo>(); - TTI = &getAnalysis<TargetTransformInfo>(); + TLI = TM->getSubtargetImpl(F)->getTargetLowering(); + TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); + TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); DT = DTWP ? &DTWP->getDomTree() : nullptr; - OptSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::OptimizeForSize); + OptSize = F.hasFnAttribute(Attribute::OptimizeForSize); /// This optimization identifies DIV instructions that can be /// profitably bypassed and carried out with a shorter, faster divide. @@ -218,15 +241,23 @@ bool CodeGenPrepare::runOnFunction(Function &F) { // into a single target instruction, push the mask and compare into branch // users. Do this before OptimizeBlock -> OptimizeInst -> // OptimizeCmpExpression, which perturbs the pattern being searched for. - if (!DisableBranchOpts) + if (!DisableBranchOpts) { EverMadeChange |= sinkAndCmp(F); + EverMadeChange |= splitBranchCondition(F); + } bool MadeChange = true; while (MadeChange) { MadeChange = false; for (Function::iterator I = F.begin(); I != F.end(); ) { BasicBlock *BB = I++; - MadeChange |= OptimizeBlock(*BB); + bool ModifiedDTOnIteration = false; + MadeChange |= OptimizeBlock(*BB, ModifiedDTOnIteration); + + // Restart BB iteration if the dominator tree of the Function was changed + ModifiedDT |= ModifiedDTOnIteration; + if (ModifiedDTOnIteration) + break; } EverMadeChange |= MadeChange; } @@ -236,9 +267,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) { if (!DisableBranchOpts) { MadeChange = false; SmallPtrSet<BasicBlock*, 8> WorkList; - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB)); - MadeChange |= ConstantFoldTerminator(BB, true); + for (BasicBlock &BB : F) { + SmallVector<BasicBlock *, 2> Successors(succ_begin(&BB), succ_end(&BB)); + MadeChange |= ConstantFoldTerminator(&BB, true); if (!MadeChange) continue; for (SmallVectorImpl<BasicBlock*>::iterator @@ -272,6 +303,16 @@ bool CodeGenPrepare::runOnFunction(Function &F) { EverMadeChange |= MadeChange; } + if (!DisableGCOpts) { + SmallVector<Instruction *, 2> Statepoints; + for (BasicBlock &BB : F) + for (Instruction &I : BB) + if (isStatepoint(I)) + Statepoints.push_back(&I); + for (auto &I : Statepoints) + EverMadeChange |= simplifyOffsetableRelocate(*I); + } + if (ModifiedDT && DT) DT->recalculate(F); @@ -300,7 +341,7 @@ bool CodeGenPrepare::EliminateFallThrough(Function &F) { // Remember if SinglePred was the entry block of the function. // If so, we will need to move BB back to the entry position. bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); - MergeBasicBlockIntoOnlyPred(BB, this); + MergeBasicBlockIntoOnlyPred(BB, DT); if (isEntry && BB != &BB->getParent()->getEntryBlock()) BB->moveBefore(&BB->getParent()->getEntryBlock()); @@ -440,7 +481,7 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { // Remember if SinglePred was the entry block of the function. If so, we // will need to move BB back to the entry position. bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); - MergeBasicBlockIntoOnlyPred(DestBB, this); + MergeBasicBlockIntoOnlyPred(DestBB, DT); if (isEntry && BB != &BB->getParent()->getEntryBlock()) BB->moveBefore(&BB->getParent()->getEntryBlock()); @@ -495,6 +536,144 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); } +// Computes a map of base pointer relocation instructions to corresponding +// derived pointer relocation instructions given a vector of all relocate calls +static void computeBaseDerivedRelocateMap( + const SmallVectorImpl<User *> &AllRelocateCalls, + DenseMap<IntrinsicInst *, SmallVector<IntrinsicInst *, 2>> & + RelocateInstMap) { + // Collect information in two maps: one primarily for locating the base object + // while filling the second map; the second map is the final structure holding + // a mapping between Base and corresponding Derived relocate calls + DenseMap<std::pair<unsigned, unsigned>, IntrinsicInst *> RelocateIdxMap; + for (auto &U : AllRelocateCalls) { + GCRelocateOperands ThisRelocate(U); + IntrinsicInst *I = cast<IntrinsicInst>(U); + auto K = std::make_pair(ThisRelocate.basePtrIndex(), + ThisRelocate.derivedPtrIndex()); + RelocateIdxMap.insert(std::make_pair(K, I)); + } + for (auto &Item : RelocateIdxMap) { + std::pair<unsigned, unsigned> Key = Item.first; + if (Key.first == Key.second) + // Base relocation: nothing to insert + continue; + + IntrinsicInst *I = Item.second; + auto BaseKey = std::make_pair(Key.first, Key.first); + IntrinsicInst *Base = RelocateIdxMap[BaseKey]; + if (!Base) + // TODO: We might want to insert a new base object relocate and gep off + // that, if there are enough derived object relocates. + continue; + RelocateInstMap[Base].push_back(I); + } +} + +// Accepts a GEP and extracts the operands into a vector provided they're all +// small integer constants +static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP, + SmallVectorImpl<Value *> &OffsetV) { + for (unsigned i = 1; i < GEP->getNumOperands(); i++) { + // Only accept small constant integer operands + auto Op = dyn_cast<ConstantInt>(GEP->getOperand(i)); + if (!Op || Op->getZExtValue() > 20) + return false; + } + + for (unsigned i = 1; i < GEP->getNumOperands(); i++) + OffsetV.push_back(GEP->getOperand(i)); + return true; +} + +// Takes a RelocatedBase (base pointer relocation instruction) and Targets to +// replace, computes a replacement, and affects it. +static bool +simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase, + const SmallVectorImpl<IntrinsicInst *> &Targets) { + bool MadeChange = false; + for (auto &ToReplace : Targets) { + GCRelocateOperands MasterRelocate(RelocatedBase); + GCRelocateOperands ThisRelocate(ToReplace); + + assert(ThisRelocate.basePtrIndex() == MasterRelocate.basePtrIndex() && + "Not relocating a derived object of the original base object"); + if (ThisRelocate.basePtrIndex() == ThisRelocate.derivedPtrIndex()) { + // A duplicate relocate call. TODO: coalesce duplicates. + continue; + } + + Value *Base = ThisRelocate.basePtr(); + auto Derived = dyn_cast<GetElementPtrInst>(ThisRelocate.derivedPtr()); + if (!Derived || Derived->getPointerOperand() != Base) + continue; + + SmallVector<Value *, 2> OffsetV; + if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV)) + continue; + + // Create a Builder and replace the target callsite with a gep + IRBuilder<> Builder(ToReplace); + Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc()); + Value *Replacement = + Builder.CreateGEP(RelocatedBase, makeArrayRef(OffsetV)); + Instruction *ReplacementInst = cast<Instruction>(Replacement); + ReplacementInst->removeFromParent(); + ReplacementInst->insertAfter(RelocatedBase); + Replacement->takeName(ToReplace); + ToReplace->replaceAllUsesWith(Replacement); + ToReplace->eraseFromParent(); + + MadeChange = true; + } + return MadeChange; +} + +// Turns this: +// +// %base = ... +// %ptr = gep %base + 15 +// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr) +// %base' = relocate(%tok, i32 4, i32 4) +// %ptr' = relocate(%tok, i32 4, i32 5) +// %val = load %ptr' +// +// into this: +// +// %base = ... +// %ptr = gep %base + 15 +// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr) +// %base' = gc.relocate(%tok, i32 4, i32 4) +// %ptr' = gep %base' + 15 +// %val = load %ptr' +bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) { + bool MadeChange = false; + SmallVector<User *, 2> AllRelocateCalls; + + for (auto *U : I.users()) + if (isGCRelocate(dyn_cast<Instruction>(U))) + // Collect all the relocate calls associated with a statepoint + AllRelocateCalls.push_back(U); + + // We need atleast one base pointer relocation + one derived pointer + // relocation to mangle + if (AllRelocateCalls.size() < 2) + return false; + + // RelocateInstMap is a mapping from the base relocate instruction to the + // corresponding derived relocate instructions + DenseMap<IntrinsicInst *, SmallVector<IntrinsicInst *, 2>> RelocateInstMap; + computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap); + if (RelocateInstMap.empty()) + return false; + + for (auto &Item : RelocateInstMap) + // Item.first is the RelocatedBase to offset against + // Item.second is the vector of Targets to replace + MadeChange = simplifyRelocatesOffABase(Item.first, Item.second); + return MadeChange; +} + /// SinkCast - Sink the specified cast instruction into its user blocks static bool SinkCast(CastInst *CI) { BasicBlock *DefBB = CI->getParent(); @@ -822,23 +1001,211 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, return MadeChange; } -namespace { -class CodeGenPrepareFortifiedLibCalls : public SimplifyFortifiedLibCalls { -protected: - void replaceCall(Value *With) override { - CI->replaceAllUsesWith(With); - CI->eraseFromParent(); +// ScalarizeMaskedLoad() translates masked load intrinsic, like +// <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align, +// <16 x i1> %mask, <16 x i32> %passthru) +// to a chain of basic blocks, whith loading element one-by-one if +// the appropriate mask bit is set +// +// %1 = bitcast i8* %addr to i32* +// %2 = extractelement <16 x i1> %mask, i32 0 +// %3 = icmp eq i1 %2, true +// br i1 %3, label %cond.load, label %else +// +//cond.load: ; preds = %0 +// %4 = getelementptr i32* %1, i32 0 +// %5 = load i32* %4 +// %6 = insertelement <16 x i32> undef, i32 %5, i32 0 +// br label %else +// +//else: ; preds = %0, %cond.load +// %res.phi.else = phi <16 x i32> [ %6, %cond.load ], [ undef, %0 ] +// %7 = extractelement <16 x i1> %mask, i32 1 +// %8 = icmp eq i1 %7, true +// br i1 %8, label %cond.load1, label %else2 +// +//cond.load1: ; preds = %else +// %9 = getelementptr i32* %1, i32 1 +// %10 = load i32* %9 +// %11 = insertelement <16 x i32> %res.phi.else, i32 %10, i32 1 +// br label %else2 +// +//else2: ; preds = %else, %cond.load1 +// %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ] +// %12 = extractelement <16 x i1> %mask, i32 2 +// %13 = icmp eq i1 %12, true +// br i1 %13, label %cond.load4, label %else5 +// +static void ScalarizeMaskedLoad(CallInst *CI) { + Value *Ptr = CI->getArgOperand(0); + Value *Src0 = CI->getArgOperand(3); + Value *Mask = CI->getArgOperand(2); + VectorType *VecType = dyn_cast<VectorType>(CI->getType()); + Type *EltTy = VecType->getElementType(); + + assert(VecType && "Unexpected return type of masked load intrinsic"); + + IRBuilder<> Builder(CI->getContext()); + Instruction *InsertPt = CI; + BasicBlock *IfBlock = CI->getParent(); + BasicBlock *CondBlock = nullptr; + BasicBlock *PrevIfBlock = CI->getParent(); + Builder.SetInsertPoint(InsertPt); + + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + + // Bitcast %addr fron i8* to EltTy* + Type *NewPtrType = + EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace()); + Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType); + Value *UndefVal = UndefValue::get(VecType); + + // The result vector + Value *VResult = UndefVal; + + PHINode *Phi = nullptr; + Value *PrevPhi = UndefVal; + + unsigned VectorWidth = VecType->getNumElements(); + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + + // Fill the "else" block, created in the previous iteration + // + // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ] + // %mask_1 = extractelement <16 x i1> %mask, i32 Idx + // %to_load = icmp eq i1 %mask_1, true + // br i1 %to_load, label %cond.load, label %else + // + if (Idx > 0) { + Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); + Phi->addIncoming(VResult, CondBlock); + Phi->addIncoming(PrevPhi, PrevIfBlock); + PrevPhi = Phi; + VResult = Phi; + } + + Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx)); + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, + ConstantInt::get(Predicate->getType(), 1)); + + // Create "cond" block + // + // %EltAddr = getelementptr i32* %1, i32 0 + // %Elt = load i32* %EltAddr + // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx + // + CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load"); + Builder.SetInsertPoint(InsertPt); + + Value* Gep = Builder.CreateInBoundsGEP(FirstEltPtr, Builder.getInt32(Idx)); + LoadInst* Load = Builder.CreateLoad(Gep, false); + VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx)); + + // Create "else" block, fill it in the next iteration + BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else"); + Builder.SetInsertPoint(InsertPt); + Instruction *OldBr = IfBlock->getTerminator(); + BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); + OldBr->eraseFromParent(); + PrevIfBlock = IfBlock; + IfBlock = NewIfBlock; } - bool isFoldable(unsigned SizeCIOp, unsigned, bool) const override { - if (ConstantInt *SizeCI = - dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) - return SizeCI->isAllOnesValue(); - return false; + + Phi = Builder.CreatePHI(VecType, 2, "res.phi.select"); + Phi->addIncoming(VResult, CondBlock); + Phi->addIncoming(PrevPhi, PrevIfBlock); + Value *NewI = Builder.CreateSelect(Mask, Phi, Src0); + CI->replaceAllUsesWith(NewI); + CI->eraseFromParent(); +} + +// ScalarizeMaskedStore() translates masked store intrinsic, like +// void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align, +// <16 x i1> %mask) +// to a chain of basic blocks, that stores element one-by-one if +// the appropriate mask bit is set +// +// %1 = bitcast i8* %addr to i32* +// %2 = extractelement <16 x i1> %mask, i32 0 +// %3 = icmp eq i1 %2, true +// br i1 %3, label %cond.store, label %else +// +// cond.store: ; preds = %0 +// %4 = extractelement <16 x i32> %val, i32 0 +// %5 = getelementptr i32* %1, i32 0 +// store i32 %4, i32* %5 +// br label %else +// +// else: ; preds = %0, %cond.store +// %6 = extractelement <16 x i1> %mask, i32 1 +// %7 = icmp eq i1 %6, true +// br i1 %7, label %cond.store1, label %else2 +// +// cond.store1: ; preds = %else +// %8 = extractelement <16 x i32> %val, i32 1 +// %9 = getelementptr i32* %1, i32 1 +// store i32 %8, i32* %9 +// br label %else2 +// . . . +static void ScalarizeMaskedStore(CallInst *CI) { + Value *Ptr = CI->getArgOperand(1); + Value *Src = CI->getArgOperand(0); + Value *Mask = CI->getArgOperand(3); + + VectorType *VecType = dyn_cast<VectorType>(Src->getType()); + Type *EltTy = VecType->getElementType(); + + assert(VecType && "Unexpected data type in masked store intrinsic"); + + IRBuilder<> Builder(CI->getContext()); + Instruction *InsertPt = CI; + BasicBlock *IfBlock = CI->getParent(); + Builder.SetInsertPoint(InsertPt); + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + + // Bitcast %addr fron i8* to EltTy* + Type *NewPtrType = + EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace()); + Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType); + + unsigned VectorWidth = VecType->getNumElements(); + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + + // Fill the "else" block, created in the previous iteration + // + // %mask_1 = extractelement <16 x i1> %mask, i32 Idx + // %to_store = icmp eq i1 %mask_1, true + // br i1 %to_load, label %cond.store, label %else + // + Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx)); + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, + ConstantInt::get(Predicate->getType(), 1)); + + // Create "cond" block + // + // %OneElt = extractelement <16 x i32> %Src, i32 Idx + // %EltAddr = getelementptr i32* %1, i32 0 + // %store i32 %OneElt, i32* %EltAddr + // + BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store"); + Builder.SetInsertPoint(InsertPt); + + Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx)); + Value* Gep = Builder.CreateInBoundsGEP(FirstEltPtr, Builder.getInt32(Idx)); + Builder.CreateStore(OneElt, Gep); + + // Create "else" block, fill it in the next iteration + BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else"); + Builder.SetInsertPoint(InsertPt); + Instruction *OldBr = IfBlock->getTerminator(); + BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); + OldBr->eraseFromParent(); + IfBlock = NewIfBlock; } -}; -} // end anonymous namespace + CI->eraseFromParent(); +} -bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { +bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) { BasicBlock *BB = CI->getParent(); // Lower inline assembly if we can. @@ -858,38 +1225,60 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { return true; } - // Lower all uses of llvm.objectsize.* IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); - if (II && II->getIntrinsicID() == Intrinsic::objectsize) { - bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1); - Type *ReturnTy = CI->getType(); - Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL); - - // Substituting this can cause recursive simplifications, which can - // invalidate our iterator. Use a WeakVH to hold onto it in case this - // happens. - WeakVH IterHandle(CurInstIterator); + if (II) { + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::objectsize: { + // Lower all uses of llvm.objectsize.* + bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1); + Type *ReturnTy = CI->getType(); + Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL); + + // Substituting this can cause recursive simplifications, which can + // invalidate our iterator. Use a WeakVH to hold onto it in case this + // happens. + WeakVH IterHandle(CurInstIterator); + + replaceAndRecursivelySimplify(CI, RetVal, + TLI ? TLI->getDataLayout() : nullptr, + TLInfo, ModifiedDT ? nullptr : DT); - replaceAndRecursivelySimplify(CI, RetVal, - TLI ? TLI->getDataLayout() : nullptr, - TLInfo, ModifiedDT ? nullptr : DT); - - // If the iterator instruction was recursively deleted, start over at the - // start of the block. - if (IterHandle != CurInstIterator) { - CurInstIterator = BB->begin(); - SunkAddrs.clear(); + // If the iterator instruction was recursively deleted, start over at the + // start of the block. + if (IterHandle != CurInstIterator) { + CurInstIterator = BB->begin(); + SunkAddrs.clear(); + } + return true; + } + case Intrinsic::masked_load: { + // Scalarize unsupported vector masked load + if (!TTI->isLegalMaskedLoad(CI->getType(), 1)) { + ScalarizeMaskedLoad(CI); + ModifiedDT = true; + return true; + } + return false; + } + case Intrinsic::masked_store: { + if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType(), 1)) { + ScalarizeMaskedStore(CI); + ModifiedDT = true; + return true; + } + return false; + } } - return true; - } - if (II && TLI) { - SmallVector<Value*, 2> PtrOps; - Type *AccessTy; - if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy)) - while (!PtrOps.empty()) - if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy)) - return true; + if (TLI) { + SmallVector<Value*, 2> PtrOps; + Type *AccessTy; + if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy)) + while (!PtrOps.empty()) + if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy)) + return true; + } } // From here on out we're working with named functions. @@ -901,10 +1290,15 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { // Lower all default uses of _chk calls. This is very similar // to what InstCombineCalls does, but here we are only lowering calls - // that have the default "don't know" as the objectsize. Anything else - // should be left alone. - CodeGenPrepareFortifiedLibCalls Simplifier; - return Simplifier.fold(CI, TD, TLInfo); + // to fortified library functions (e.g. __memcpy_chk) that have the default + // "don't know" as the objectsize. Anything else should be left alone. + FortifiedLibCallSimplifier Simplifier(TD, TLInfo, true); + if (Value *V = Simplifier.optimizeCall(CI)) { + CI->replaceAllUsesWith(V); + CI->eraseFromParent(); + return true; + } + return false; } /// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return @@ -1561,6 +1955,7 @@ void TypePromotionTransaction::rollback( /// This encapsulates the logic for matching the target-legal addressing modes. class AddressingModeMatcher { SmallVectorImpl<Instruction*> &AddrModeInsts; + const TargetMachine &TM; const TargetLowering &TLI; /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and @@ -1584,13 +1979,15 @@ class AddressingModeMatcher { /// always returns true. bool IgnoreProfitability; - AddressingModeMatcher(SmallVectorImpl<Instruction*> &AMI, - const TargetLowering &T, Type *AT, - Instruction *MI, ExtAddrMode &AM, - const SetOfInstrs &InsertedTruncs, + AddressingModeMatcher(SmallVectorImpl<Instruction *> &AMI, + const TargetMachine &TM, Type *AT, Instruction *MI, + ExtAddrMode &AM, const SetOfInstrs &InsertedTruncs, InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT) - : AddrModeInsts(AMI), TLI(T), AccessTy(AT), MemoryInst(MI), AddrMode(AM), + : AddrModeInsts(AMI), TM(TM), + TLI(*TM.getSubtargetImpl(*MI->getParent()->getParent()) + ->getTargetLowering()), + AccessTy(AT), MemoryInst(MI), AddrMode(AM), InsertedTruncs(InsertedTruncs), PromotedInsts(PromotedInsts), TPT(TPT) { IgnoreProfitability = false; } @@ -1607,13 +2004,13 @@ public: static ExtAddrMode Match(Value *V, Type *AccessTy, Instruction *MemoryInst, SmallVectorImpl<Instruction*> &AddrModeInsts, - const TargetLowering &TLI, + const TargetMachine &TM, const SetOfInstrs &InsertedTruncs, InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT) { ExtAddrMode Result; - bool Success = AddressingModeMatcher(AddrModeInsts, TLI, AccessTy, + bool Success = AddressingModeMatcher(AddrModeInsts, TM, AccessTy, MemoryInst, Result, InsertedTruncs, PromotedInsts, TPT).MatchAddr(V, 0); (void)Success; assert(Success && "Couldn't select *anything*?"); @@ -1718,6 +2115,23 @@ static bool MightBeFoldableInst(Instruction *I) { } } +/// \brief Check whether or not \p Val is a legal instruction for \p TLI. +/// \note \p Val is assumed to be the product of some type promotion. +/// Therefore if \p Val has an undefined state in \p TLI, this is assumed +/// to be legal, as the non-promoted value would have had the same state. +static bool isPromotedInstructionLegal(const TargetLowering &TLI, Value *Val) { + Instruction *PromotedInst = dyn_cast<Instruction>(Val); + if (!PromotedInst) + return false; + int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode()); + // If the ISDOpcode is undefined, it was undefined before the promotion. + if (!ISDOpcode) + return true; + // Otherwise, check if the promoted instruction is legal or not. + return TLI.isOperationLegalOrCustom( + ISDOpcode, TLI.getValueType(PromotedInst->getType())); +} + /// \brief Hepler class to perform type promotion. class TypePromotionHelper { /// \brief Utility function to check whether or not a sign or zero extension @@ -1747,46 +2161,59 @@ class TypePromotionHelper { /// \p PromotedInsts maps the instructions to their type before promotion. /// \p CreatedInsts[out] contains how many non-free instructions have been /// created to promote the operand of Ext. + /// Newly added extensions are inserted in \p Exts. + /// Newly added truncates are inserted in \p Truncs. /// Should never be called directly. /// \return The promoted value which is used instead of Ext. - static Value *promoteOperandForTruncAndAnyExt(Instruction *Ext, - TypePromotionTransaction &TPT, - InstrToOrigTy &PromotedInsts, - unsigned &CreatedInsts); + static Value *promoteOperandForTruncAndAnyExt( + Instruction *Ext, TypePromotionTransaction &TPT, + InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, + SmallVectorImpl<Instruction *> *Exts, + SmallVectorImpl<Instruction *> *Truncs); /// \brief Utility function to promote the operand of \p Ext when this /// operand is promotable and is not a supported trunc or sext. /// \p PromotedInsts maps the instructions to their type before promotion. /// \p CreatedInsts[out] contains how many non-free instructions have been /// created to promote the operand of Ext. + /// Newly added extensions are inserted in \p Exts. + /// Newly added truncates are inserted in \p Truncs. /// Should never be called directly. /// \return The promoted value which is used instead of Ext. - static Value *promoteOperandForOther(Instruction *Ext, - TypePromotionTransaction &TPT, - InstrToOrigTy &PromotedInsts, - unsigned &CreatedInsts, bool IsSExt); + static Value * + promoteOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT, + InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, + SmallVectorImpl<Instruction *> *Exts, + SmallVectorImpl<Instruction *> *Truncs, bool IsSExt); /// \see promoteOperandForOther. - static Value *signExtendOperandForOther(Instruction *Ext, - TypePromotionTransaction &TPT, - InstrToOrigTy &PromotedInsts, - unsigned &CreatedInsts) { - return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, true); + static Value * + signExtendOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT, + InstrToOrigTy &PromotedInsts, + unsigned &CreatedInsts, + SmallVectorImpl<Instruction *> *Exts, + SmallVectorImpl<Instruction *> *Truncs) { + return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, Exts, + Truncs, true); } /// \see promoteOperandForOther. - static Value *zeroExtendOperandForOther(Instruction *Ext, - TypePromotionTransaction &TPT, - InstrToOrigTy &PromotedInsts, - unsigned &CreatedInsts) { - return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, false); + static Value * + zeroExtendOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT, + InstrToOrigTy &PromotedInsts, + unsigned &CreatedInsts, + SmallVectorImpl<Instruction *> *Exts, + SmallVectorImpl<Instruction *> *Truncs) { + return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, Exts, + Truncs, false); } public: /// Type for the utility function that promotes the operand of Ext. typedef Value *(*Action)(Instruction *Ext, TypePromotionTransaction &TPT, - InstrToOrigTy &PromotedInsts, - unsigned &CreatedInsts); + InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, + SmallVectorImpl<Instruction *> *Exts, + SmallVectorImpl<Instruction *> *Truncs); /// \brief Given a sign/zero extend instruction \p Ext, return the approriate /// action to promote the operand of \p Ext instead of using Ext. /// \return NULL if no promotable action is possible with the current @@ -1805,6 +2232,12 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst, Type *ConsideredExtType, const InstrToOrigTy &PromotedInsts, bool IsSExt) { + // The promotion helper does not know how to deal with vector types yet. + // To be able to fix that, we would need to fix the places where we + // statically extend, e.g., constants and such. + if (Inst->getType()->isVectorTy()) + return false; + // We can always get through zext. if (isa<ZExtInst>(Inst)) return true; @@ -1830,8 +2263,9 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst, // Check if we can use this operand in the extension. // If the type is larger than the result type of the extension, // we cannot. - if (OpndVal->getType()->getIntegerBitWidth() > - ConsideredExtType->getIntegerBitWidth()) + if (!OpndVal->getType()->isIntegerTy() || + OpndVal->getType()->getIntegerBitWidth() > + ConsideredExtType->getIntegerBitWidth()) return false; // If the operand of the truncate is not an instruction, we will not have @@ -1896,7 +2330,9 @@ TypePromotionHelper::Action TypePromotionHelper::getAction( Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt( llvm::Instruction *SExt, TypePromotionTransaction &TPT, - InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts) { + InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, + SmallVectorImpl<Instruction *> *Exts, + SmallVectorImpl<Instruction *> *Truncs) { // By construction, the operand of SExt is an instruction. Otherwise we cannot // get through it and this method should not be called. Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0)); @@ -1922,8 +2358,11 @@ Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt( // Check if the extension is still needed. Instruction *ExtInst = dyn_cast<Instruction>(ExtVal); - if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) + if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) { + if (ExtInst && Exts) + Exts->push_back(ExtInst); return ExtVal; + } // At this point we have: ext ty opnd to ty. // Reassign the uses of ExtInst to the opnd and remove ExtInst. @@ -1934,7 +2373,9 @@ Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt( Value *TypePromotionHelper::promoteOperandForOther( Instruction *Ext, TypePromotionTransaction &TPT, - InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, bool IsSExt) { + InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, + SmallVectorImpl<Instruction *> *Exts, + SmallVectorImpl<Instruction *> *Truncs, bool IsSExt) { // By construction, the operand of Ext is an instruction. Otherwise we cannot // get through it and this method should not be called. Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0)); @@ -1949,6 +2390,8 @@ Value *TypePromotionHelper::promoteOperandForOther( ITrunc->removeFromParent(); // Insert it just after the definition. ITrunc->insertAfter(ExtOpnd); + if (Truncs) + Truncs->push_back(ITrunc); } TPT.replaceAllUsesWith(ExtOpnd, Trunc); @@ -2004,12 +2447,17 @@ Value *TypePromotionHelper::promoteOperandForOther( if (!ExtForOpnd) { // If yes, create a new one. DEBUG(dbgs() << "More operands to ext\n"); - ExtForOpnd = - cast<Instruction>(IsSExt ? TPT.createSExt(Ext, Opnd, Ext->getType()) - : TPT.createZExt(Ext, Opnd, Ext->getType())); + Value *ValForExtOpnd = IsSExt ? TPT.createSExt(Ext, Opnd, Ext->getType()) + : TPT.createZExt(Ext, Opnd, Ext->getType()); + if (!isa<Instruction>(ValForExtOpnd)) { + TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd); + continue; + } + ExtForOpnd = cast<Instruction>(ValForExtOpnd); ++CreatedInsts; } - + if (Exts) + Exts->push_back(ExtForOpnd); TPT.setOperand(ExtForOpnd, 0, Opnd); // Move the sign extension before the insertion point. @@ -2047,16 +2495,7 @@ AddressingModeMatcher::IsPromotionProfitable(unsigned MatchedSize, // The promotion is neutral but it may help folding the sign extension in // loads for instance. // Check that we did not create an illegal instruction. - Instruction *PromotedInst = dyn_cast<Instruction>(PromotedOperand); - if (!PromotedInst) - return false; - int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode()); - // If the ISDOpcode is undefined, it was undefined before the promotion. - if (!ISDOpcode) - return true; - // Otherwise, check if the promoted instruction is legal or not. - return TLI.isOperationLegalOrCustom( - ISDOpcode, TLI.getValueType(PromotedInst->getType())); + return isPromotedInstructionLegal(TLI, PromotedOperand); } /// MatchOperationAddr - Given an instruction or constant expr, see if we can @@ -2250,7 +2689,8 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); unsigned CreatedInsts = 0; - Value *PromotedOperand = TPH(Ext, TPT, PromotedInsts, CreatedInsts); + Value *PromotedOperand = + TPH(Ext, TPT, PromotedInsts, CreatedInsts, nullptr, nullptr); // SExt has been moved away. // Thus either it will be rematched later in the recursive calls or it is // gone. Anyway, we must not fold it into the addressing mode at this point. @@ -2374,13 +2814,17 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { /// inline asm call are due to memory operands. If so, return true, otherwise /// return false. static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, - const TargetLowering &TLI) { - TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI)); + const TargetMachine &TM) { + const Function *F = CI->getParent()->getParent(); + const TargetLowering *TLI = TM.getSubtargetImpl(*F)->getTargetLowering(); + const TargetRegisterInfo *TRI = TM.getSubtargetImpl(*F)->getRegisterInfo(); + TargetLowering::AsmOperandInfoVector TargetConstraints = + TLI->ParseConstraints(TRI, ImmutableCallSite(CI)); for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; // Compute the constraint code and ConstraintType to use. - TLI.ComputeConstraintToUse(OpInfo, SDValue()); + TLI->ComputeConstraintToUse(OpInfo, SDValue()); // If this asm operand is our Value*, and if it isn't an indirect memory // operand, we can't fold it! @@ -2396,10 +2840,10 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, /// FindAllMemoryUses - Recursively walk all the uses of I until we find a /// memory use. If we find an obviously non-foldable instruction, return true. /// Add the ultimately found memory instructions to MemoryUses. -static bool FindAllMemoryUses(Instruction *I, - SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses, - SmallPtrSetImpl<Instruction*> &ConsideredInsts, - const TargetLowering &TLI) { +static bool FindAllMemoryUses( + Instruction *I, + SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses, + SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetMachine &TM) { // If we already considered this instruction, we're done. if (!ConsideredInsts.insert(I).second) return false; @@ -2429,12 +2873,12 @@ static bool FindAllMemoryUses(Instruction *I, if (!IA) return true; // If this is a memory operand, we're cool, otherwise bail out. - if (!IsOperandAMemoryOperand(CI, IA, I, TLI)) + if (!IsOperandAMemoryOperand(CI, IA, I, TM)) return true; continue; } - if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI)) + if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TM)) return true; } @@ -2522,7 +2966,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, // uses. SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses; SmallPtrSet<Instruction*, 16> ConsideredInsts; - if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI)) + if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TM)) return false; // Has a non-memory, non-foldable use! // Now that we know that all uses of this instruction are part of a chain of @@ -2547,7 +2991,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, ExtAddrMode Result; TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); - AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy, + AddressingModeMatcher Matcher(MatchedAddrModeInsts, TM, AddressAccessTy, MemoryInst, Result, InsertedTruncs, PromotedInsts, TPT); Matcher.IgnoreProfitability = true; @@ -2630,7 +3074,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // For non-PHIs, determine the addressing mode being computed. SmallVector<Instruction*, 16> NewAddrModeInsts; ExtAddrMode NewAddrMode = AddressingModeMatcher::Match( - V, AccessTy, MemoryInst, NewAddrModeInsts, *TLI, InsertedTruncsSet, + V, AccessTy, MemoryInst, NewAddrModeInsts, *TM, InsertedTruncsSet, PromotedInsts, TPT); // This check is broken into two cases with very similar code to avoid using @@ -2705,8 +3149,10 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, << *MemoryInst << "\n"); if (SunkAddr->getType() != Addr->getType()) SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType()); - } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() && - TM && TM->getSubtarget<TargetSubtargetInfo>().useAA())) { + } else if (AddrSinkUsingGEPs || + (!AddrSinkUsingGEPs.getNumOccurrences() && TM && + TM->getSubtargetImpl(*MemoryInst->getParent()->getParent()) + ->useAA())) { // By default, we use the GEP-based method when AA is used later. This // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities. DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " @@ -2929,8 +3375,10 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) { bool MadeChange = false; + const TargetRegisterInfo *TRI = + TM->getSubtargetImpl(*CS->getParent()->getParent())->getRegisterInfo(); TargetLowering::AsmOperandInfoVector - TargetConstraints = TLI->ParseConstraints(CS); + TargetConstraints = TLI->ParseConstraints(TRI, CS); unsigned ArgNo = 0; for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; @@ -2949,26 +3397,186 @@ bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) { return MadeChange; } +/// \brief Check if all the uses of \p Inst are equivalent (or free) zero or +/// sign extensions. +static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) { + assert(!Inst->use_empty() && "Input must have at least one use"); + const Instruction *FirstUser = cast<Instruction>(*Inst->user_begin()); + bool IsSExt = isa<SExtInst>(FirstUser); + Type *ExtTy = FirstUser->getType(); + for (const User *U : Inst->users()) { + const Instruction *UI = cast<Instruction>(U); + if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI))) + return false; + Type *CurTy = UI->getType(); + // Same input and output types: Same instruction after CSE. + if (CurTy == ExtTy) + continue; + + // If IsSExt is true, we are in this situation: + // a = Inst + // b = sext ty1 a to ty2 + // c = sext ty1 a to ty3 + // Assuming ty2 is shorter than ty3, this could be turned into: + // a = Inst + // b = sext ty1 a to ty2 + // c = sext ty2 b to ty3 + // However, the last sext is not free. + if (IsSExt) + return false; + + // This is a ZExt, maybe this is free to extend from one type to another. + // In that case, we would not account for a different use. + Type *NarrowTy; + Type *LargeTy; + if (ExtTy->getScalarType()->getIntegerBitWidth() > + CurTy->getScalarType()->getIntegerBitWidth()) { + NarrowTy = CurTy; + LargeTy = ExtTy; + } else { + NarrowTy = ExtTy; + LargeTy = CurTy; + } + + if (!TLI.isZExtFree(NarrowTy, LargeTy)) + return false; + } + // All uses are the same or can be derived from one another for free. + return true; +} + +/// \brief Try to form ExtLd by promoting \p Exts until they reach a +/// load instruction. +/// If an ext(load) can be formed, it is returned via \p LI for the load +/// and \p Inst for the extension. +/// Otherwise LI == nullptr and Inst == nullptr. +/// When some promotion happened, \p TPT contains the proper state to +/// revert them. +/// +/// \return true when promoting was necessary to expose the ext(load) +/// opportunity, false otherwise. +/// +/// Example: +/// \code +/// %ld = load i32* %addr +/// %add = add nuw i32 %ld, 4 +/// %zext = zext i32 %add to i64 +/// \endcode +/// => +/// \code +/// %ld = load i32* %addr +/// %zext = zext i32 %ld to i64 +/// %add = add nuw i64 %zext, 4 +/// \encode +/// Thanks to the promotion, we can match zext(load i32*) to i64. +bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT, + LoadInst *&LI, Instruction *&Inst, + const SmallVectorImpl<Instruction *> &Exts, + unsigned CreatedInsts = 0) { + // Iterate over all the extensions to see if one form an ext(load). + for (auto I : Exts) { + // Check if we directly have ext(load). + if ((LI = dyn_cast<LoadInst>(I->getOperand(0)))) { + Inst = I; + // No promotion happened here. + return false; + } + // Check whether or not we want to do any promotion. + if (!TLI || !TLI->enableExtLdPromotion() || DisableExtLdPromotion) + continue; + // Get the action to perform the promotion. + TypePromotionHelper::Action TPH = TypePromotionHelper::getAction( + I, InsertedTruncsSet, *TLI, PromotedInsts); + // Check if we can promote. + if (!TPH) + continue; + // Save the current state. + TypePromotionTransaction::ConstRestorationPt LastKnownGood = + TPT.getRestorationPoint(); + SmallVector<Instruction *, 4> NewExts; + unsigned NewCreatedInsts = 0; + // Promote. + Value *PromotedVal = + TPH(I, TPT, PromotedInsts, NewCreatedInsts, &NewExts, nullptr); + assert(PromotedVal && + "TypePromotionHelper should have filtered out those cases"); + + // We would be able to merge only one extension in a load. + // Therefore, if we have more than 1 new extension we heuristically + // cut this search path, because it means we degrade the code quality. + // With exactly 2, the transformation is neutral, because we will merge + // one extension but leave one. However, we optimistically keep going, + // because the new extension may be removed too. + unsigned TotalCreatedInsts = CreatedInsts + NewCreatedInsts; + if (!StressExtLdPromotion && + (TotalCreatedInsts > 1 || + !isPromotedInstructionLegal(*TLI, PromotedVal))) { + // The promotion is not profitable, rollback to the previous state. + TPT.rollback(LastKnownGood); + continue; + } + // The promotion is profitable. + // Check if it exposes an ext(load). + (void)ExtLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInsts); + if (LI && (StressExtLdPromotion || NewCreatedInsts == 0 || + // If we have created a new extension, i.e., now we have two + // extensions. We must make sure one of them is merged with + // the load, otherwise we may degrade the code quality. + (LI->hasOneUse() || hasSameExtUse(LI, *TLI)))) + // Promotion happened. + return true; + // If this does not help to expose an ext(load) then, rollback. + TPT.rollback(LastKnownGood); + } + // None of the extension can form an ext(load). + LI = nullptr; + Inst = nullptr; + return false; +} + /// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same /// basic block as the load, unless conditions are unfavorable. This allows /// SelectionDAG to fold the extend into the load. +/// \p I[in/out] the extension may be modified during the process if some +/// promotions apply. /// -bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) { +bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *&I) { + // Try to promote a chain of computation if it allows to form + // an extended load. + TypePromotionTransaction TPT; + TypePromotionTransaction::ConstRestorationPt LastKnownGood = + TPT.getRestorationPoint(); + SmallVector<Instruction *, 1> Exts; + Exts.push_back(I); // Look for a load being extended. - LoadInst *LI = dyn_cast<LoadInst>(I->getOperand(0)); - if (!LI) return false; + LoadInst *LI = nullptr; + Instruction *OldExt = I; + bool HasPromoted = ExtLdPromotion(TPT, LI, I, Exts); + if (!LI || !I) { + assert(!HasPromoted && !LI && "If we did not match any load instruction " + "the code must remain the same"); + I = OldExt; + return false; + } // If they're already in the same block, there's nothing to do. - if (LI->getParent() == I->getParent()) + // Make the cheap checks first if we did not promote. + // If we promoted, we need to check if it is indeed profitable. + if (!HasPromoted && LI->getParent() == I->getParent()) return false; + EVT VT = TLI->getValueType(I->getType()); + EVT LoadVT = TLI->getValueType(LI->getType()); + // If the load has other users and the truncate is not free, this probably // isn't worthwhile. - if (!LI->hasOneUse() && - TLI && (TLI->isTypeLegal(TLI->getValueType(LI->getType())) || - !TLI->isTypeLegal(TLI->getValueType(I->getType()))) && - !TLI->isTruncateFree(I->getType(), LI->getType())) + if (!LI->hasOneUse() && TLI && + (TLI->isTypeLegal(LoadVT) || !TLI->isTypeLegal(VT)) && + !TLI->isTruncateFree(I->getType(), LI->getType())) { + I = OldExt; + TPT.rollback(LastKnownGood); return false; + } // Check whether the target supports casts folded into loads. unsigned LType; @@ -2978,11 +3586,15 @@ bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) { assert(isa<SExtInst>(I) && "Unexpected ext type!"); LType = ISD::SEXTLOAD; } - if (TLI && !TLI->isLoadExtLegal(LType, TLI->getValueType(LI->getType()))) + if (TLI && !TLI->isLoadExtLegal(LType, VT, LoadVT)) { + I = OldExt; + TPT.rollback(LastKnownGood); return false; + } // Move the extend into the same block as the load, so that SelectionDAG // can fold it. + TPT.commit(); I->removeFromParent(); I->insertAfter(LI); ++NumExtsMoved; @@ -3512,7 +4124,8 @@ void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) { isa<UndefValue>(Val) || canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo())); } else - assert(0 && "Did you modified shouldPromote and forgot to update this?"); + llvm_unreachable("Did you modified shouldPromote and forgot to update " + "this?"); ToBePromoted->setOperand(U.getOperandNo(), NewVal); } Transition->removeFromParent(); @@ -3575,7 +4188,7 @@ bool CodeGenPrepare::OptimizeExtractElementInst(Instruction *Inst) { return false; } -bool CodeGenPrepare::OptimizeInst(Instruction *I) { +bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) { if (PHINode *P = dyn_cast<PHINode>(I)) { // It is possible for very late stage optimizations (such as SimplifyCFG) // to introduce PHI nodes too late to be cleaned up. If we detect such a @@ -3654,14 +4267,14 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) { GEPI->replaceAllUsesWith(NC); GEPI->eraseFromParent(); ++NumGEPsElim; - OptimizeInst(NC); + OptimizeInst(NC, ModifiedDT); return true; } return false; } if (CallInst *CI = dyn_cast<CallInst>(I)) - return OptimizeCallInst(CI); + return OptimizeCallInst(CI, ModifiedDT); if (SelectInst *SI = dyn_cast<SelectInst>(I)) return OptimizeSelectInst(SI); @@ -3678,14 +4291,16 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) { // In this pass we look for GEP and cast instructions that are used // across basic blocks and rewrite them to improve basic-block-at-a-time // selection. -bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { +bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB, bool& ModifiedDT) { SunkAddrs.clear(); bool MadeChange = false; CurInstIterator = BB.begin(); - while (CurInstIterator != BB.end()) - MadeChange |= OptimizeInst(CurInstIterator++); - + while (CurInstIterator != BB.end()) { + MadeChange |= OptimizeInst(CurInstIterator++, ModifiedDT); + if (ModifiedDT) + return true; + } MadeChange |= DupRetToEnableTailCallOpts(&BB); return MadeChange; @@ -3696,10 +4311,10 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { // find a node corresponding to the value. bool CodeGenPrepare::PlaceDbgValues(Function &F) { bool MadeChange = false; - for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { + for (BasicBlock &BB : F) { Instruction *PrevNonDbgInst = nullptr; - for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) { - Instruction *Insn = BI; ++BI; + for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) { + Instruction *Insn = BI++; DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn); // Leave dbg.values that refer to an alloca alone. These // instrinsics describe the address of a variable (= the alloca) @@ -3793,3 +4408,233 @@ bool CodeGenPrepare::sinkAndCmp(Function &F) { } return MadeChange; } + +/// \brief Retrieve the probabilities of a conditional branch. Returns true on +/// success, or returns false if no or invalid metadata was found. +static bool extractBranchMetadata(BranchInst *BI, + uint64_t &ProbTrue, uint64_t &ProbFalse) { + assert(BI->isConditional() && + "Looking for probabilities on unconditional branch?"); + auto *ProfileData = BI->getMetadata(LLVMContext::MD_prof); + if (!ProfileData || ProfileData->getNumOperands() != 3) + return false; + + const auto *CITrue = + mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(1)); + const auto *CIFalse = + mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(2)); + if (!CITrue || !CIFalse) + return false; + + ProbTrue = CITrue->getValue().getZExtValue(); + ProbFalse = CIFalse->getValue().getZExtValue(); + + return true; +} + +/// \brief Scale down both weights to fit into uint32_t. +static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) { + uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse; + uint32_t Scale = (NewMax / UINT32_MAX) + 1; + NewTrue = NewTrue / Scale; + NewFalse = NewFalse / Scale; +} + +/// \brief Some targets prefer to split a conditional branch like: +/// \code +/// %0 = icmp ne i32 %a, 0 +/// %1 = icmp ne i32 %b, 0 +/// %or.cond = or i1 %0, %1 +/// br i1 %or.cond, label %TrueBB, label %FalseBB +/// \endcode +/// into multiple branch instructions like: +/// \code +/// bb1: +/// %0 = icmp ne i32 %a, 0 +/// br i1 %0, label %TrueBB, label %bb2 +/// bb2: +/// %1 = icmp ne i32 %b, 0 +/// br i1 %1, label %TrueBB, label %FalseBB +/// \endcode +/// This usually allows instruction selection to do even further optimizations +/// and combine the compare with the branch instruction. Currently this is +/// applied for targets which have "cheap" jump instructions. +/// +/// FIXME: Remove the (equivalent?) implementation in SelectionDAG. +/// +bool CodeGenPrepare::splitBranchCondition(Function &F) { + if (!TM || TM->Options.EnableFastISel != true || + !TLI || TLI->isJumpExpensive()) + return false; + + bool MadeChange = false; + for (auto &BB : F) { + // Does this BB end with the following? + // %cond1 = icmp|fcmp|binary instruction ... + // %cond2 = icmp|fcmp|binary instruction ... + // %cond.or = or|and i1 %cond1, cond2 + // br i1 %cond.or label %dest1, label %dest2" + BinaryOperator *LogicOp; + BasicBlock *TBB, *FBB; + if (!match(BB.getTerminator(), m_Br(m_OneUse(m_BinOp(LogicOp)), TBB, FBB))) + continue; + + unsigned Opc; + Value *Cond1, *Cond2; + if (match(LogicOp, m_And(m_OneUse(m_Value(Cond1)), + m_OneUse(m_Value(Cond2))))) + Opc = Instruction::And; + else if (match(LogicOp, m_Or(m_OneUse(m_Value(Cond1)), + m_OneUse(m_Value(Cond2))))) + Opc = Instruction::Or; + else + continue; + + if (!match(Cond1, m_CombineOr(m_Cmp(), m_BinOp())) || + !match(Cond2, m_CombineOr(m_Cmp(), m_BinOp())) ) + continue; + + DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump()); + + // Create a new BB. + auto *InsertBefore = std::next(Function::iterator(BB)) + .getNodePtrUnchecked(); + auto TmpBB = BasicBlock::Create(BB.getContext(), + BB.getName() + ".cond.split", + BB.getParent(), InsertBefore); + + // Update original basic block by using the first condition directly by the + // branch instruction and removing the no longer needed and/or instruction. + auto *Br1 = cast<BranchInst>(BB.getTerminator()); + Br1->setCondition(Cond1); + LogicOp->eraseFromParent(); + + // Depending on the conditon we have to either replace the true or the false + // successor of the original branch instruction. + if (Opc == Instruction::And) + Br1->setSuccessor(0, TmpBB); + else + Br1->setSuccessor(1, TmpBB); + + // Fill in the new basic block. + auto *Br2 = IRBuilder<>(TmpBB).CreateCondBr(Cond2, TBB, FBB); + if (auto *I = dyn_cast<Instruction>(Cond2)) { + I->removeFromParent(); + I->insertBefore(Br2); + } + + // Update PHI nodes in both successors. The original BB needs to be + // replaced in one succesor's PHI nodes, because the branch comes now from + // the newly generated BB (NewBB). In the other successor we need to add one + // incoming edge to the PHI nodes, because both branch instructions target + // now the same successor. Depending on the original branch condition + // (and/or) we have to swap the successors (TrueDest, FalseDest), so that + // we perfrom the correct update for the PHI nodes. + // This doesn't change the successor order of the just created branch + // instruction (or any other instruction). + if (Opc == Instruction::Or) + std::swap(TBB, FBB); + + // Replace the old BB with the new BB. + for (auto &I : *TBB) { + PHINode *PN = dyn_cast<PHINode>(&I); + if (!PN) + break; + int i; + while ((i = PN->getBasicBlockIndex(&BB)) >= 0) + PN->setIncomingBlock(i, TmpBB); + } + + // Add another incoming edge form the new BB. + for (auto &I : *FBB) { + PHINode *PN = dyn_cast<PHINode>(&I); + if (!PN) + break; + auto *Val = PN->getIncomingValueForBlock(&BB); + PN->addIncoming(Val, TmpBB); + } + + // Update the branch weights (from SelectionDAGBuilder:: + // FindMergedConditions). + if (Opc == Instruction::Or) { + // Codegen X | Y as: + // BB1: + // jmp_if_X TBB + // jmp TmpBB + // TmpBB: + // jmp_if_Y TBB + // jmp FBB + // + + // We have flexibility in setting Prob for BB1 and Prob for NewBB. + // The requirement is that + // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB) + // = TrueProb for orignal BB. + // Assuming the orignal weights are A and B, one choice is to set BB1's + // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice + // assumes that + // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB. + // Another choice is to assume TrueProb for BB1 equals to TrueProb for + // TmpBB, but the math is more complicated. + uint64_t TrueWeight, FalseWeight; + if (extractBranchMetadata(Br1, TrueWeight, FalseWeight)) { + uint64_t NewTrueWeight = TrueWeight; + uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight; + scaleWeights(NewTrueWeight, NewFalseWeight); + Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext()) + .createBranchWeights(TrueWeight, FalseWeight)); + + NewTrueWeight = TrueWeight; + NewFalseWeight = 2 * FalseWeight; + scaleWeights(NewTrueWeight, NewFalseWeight); + Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext()) + .createBranchWeights(TrueWeight, FalseWeight)); + } + } else { + // Codegen X & Y as: + // BB1: + // jmp_if_X TmpBB + // jmp FBB + // TmpBB: + // jmp_if_Y TBB + // jmp FBB + // + // This requires creation of TmpBB after CurBB. + + // We have flexibility in setting Prob for BB1 and Prob for TmpBB. + // The requirement is that + // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB) + // = FalseProb for orignal BB. + // Assuming the orignal weights are A and B, one choice is to set BB1's + // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice + // assumes that + // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB. + uint64_t TrueWeight, FalseWeight; + if (extractBranchMetadata(Br1, TrueWeight, FalseWeight)) { + uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight; + uint64_t NewFalseWeight = FalseWeight; + scaleWeights(NewTrueWeight, NewFalseWeight); + Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext()) + .createBranchWeights(TrueWeight, FalseWeight)); + + NewTrueWeight = 2 * TrueWeight; + NewFalseWeight = FalseWeight; + scaleWeights(NewTrueWeight, NewFalseWeight); + Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext()) + .createBranchWeights(TrueWeight, FalseWeight)); + } + } + + // Request DOM Tree update. + // Note: No point in getting fancy here, since the DT info is never + // available to CodeGenPrepare and the existing update code is broken + // anyways. + ModifiedDT = true; + + MadeChange = true; + + DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump(); + TmpBB->dump()); + } + return MadeChange; +} diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp index 48213c1..c17a35d 100644 --- a/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -59,6 +59,10 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const { if (MI->isInlineAsm()) return false; + // Don't delete frame allocation labels. + if (MI->getOpcode() == TargetOpcode::FRAME_ALLOC) + return false; + // Don't delete instructions with side effects. bool SawStore = false; if (!MI->isSafeToMove(TII, nullptr, SawStore) && !MI->isPHI()) diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp index 75b74d9..7b47a48 100644 --- a/lib/CodeGen/DwarfEHPrepare.cpp +++ b/lib/CodeGen/DwarfEHPrepare.cpp @@ -14,18 +14,12 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/Statistic.h" -#include "llvm/IR/CallSite.h" -#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" -#include "llvm/MC/MCAsmInfo.h" #include "llvm/Pass.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetSubtargetInfo.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; #define DEBUG_TYPE "dwarfehprepare" @@ -44,10 +38,13 @@ namespace { public: static char ID; // Pass identification, replacement for typeid. + + // INITIALIZE_TM_PASS requires a default constructor, but it isn't used in + // practice. + DwarfEHPrepare() : FunctionPass(ID), TM(nullptr), RewindFunction(nullptr) {} + DwarfEHPrepare(const TargetMachine *TM) - : FunctionPass(ID), TM(TM), RewindFunction(nullptr) { - initializeDominatorTreeWrapperPassPass(*PassRegistry::getPassRegistry()); - } + : FunctionPass(ID), TM(TM), RewindFunction(nullptr) {} bool runOnFunction(Function &Fn) override; @@ -56,8 +53,6 @@ namespace { return false; } - void getAnalysisUsage(AnalysisUsage &AU) const override { } - const char *getPassName() const override { return "Exception handling preparation"; } @@ -65,6 +60,8 @@ namespace { } // end anonymous namespace char DwarfEHPrepare::ID = 0; +INITIALIZE_TM_PASS(DwarfEHPrepare, "dwarfehprepare", "Prepare DWARF exceptions", + false, false) FunctionPass *llvm::createDwarfEHPass(const TargetMachine *TM) { return new DwarfEHPrepare(TM); @@ -99,11 +96,11 @@ Value *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) { RI->eraseFromParent(); if (EraseIVIs) { - if (SelIVI->getNumUses() == 0) + if (SelIVI->use_empty()) SelIVI->eraseFromParent(); - if (ExcIVI->getNumUses() == 0) + if (ExcIVI->use_empty()) ExcIVI->eraseFromParent(); - if (SelLoad && SelLoad->getNumUses() == 0) + if (SelLoad && SelLoad->use_empty()) SelLoad->eraseFromParent(); } @@ -114,9 +111,8 @@ Value *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) { /// into calls to the appropriate _Unwind_Resume function. bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) { SmallVector<ResumeInst*, 16> Resumes; - for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { - TerminatorInst *TI = I->getTerminator(); - if (ResumeInst *RI = dyn_cast<ResumeInst>(TI)) + for (BasicBlock &BB : Fn) { + if (auto *RI = dyn_cast<ResumeInst>(BB.getTerminator())) Resumes.push_back(RI); } @@ -124,9 +120,9 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) { return false; // Find the rewind function if we didn't already. - const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering(); + const TargetLowering *TLI = TM->getSubtargetImpl(Fn)->getTargetLowering(); + LLVMContext &Ctx = Fn.getContext(); if (!RewindFunction) { - LLVMContext &Ctx = Resumes[0]->getContext(); FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx), Type::getInt8PtrTy(Ctx), false); const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME); @@ -134,7 +130,6 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) { } // Create the basic block where the _Unwind_Resume call will live. - LLVMContext &Ctx = Fn.getContext(); unsigned ResumesSize = Resumes.size(); if (ResumesSize == 1) { @@ -159,9 +154,7 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) { // Extract the exception object from the ResumeInst and add it to the PHI node // that feeds the _Unwind_Resume call. - for (SmallVectorImpl<ResumeInst*>::iterator - I = Resumes.begin(), E = Resumes.end(); I != E; ++I) { - ResumeInst *RI = *I; + for (ResumeInst *RI : Resumes) { BasicBlock *Parent = RI->getParent(); BranchInst::Create(UnwindBB, Parent); @@ -181,6 +174,7 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) { } bool DwarfEHPrepare::runOnFunction(Function &Fn) { + assert(TM && "DWARF EH preparation requires a target machine"); bool Changed = InsertUnwindResumeCalls(Fn); return Changed; } diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp index 995606f..8f74271 100644 --- a/lib/CodeGen/EarlyIfConversion.cpp +++ b/lib/CodeGen/EarlyIfConversion.cpp @@ -777,15 +777,13 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n" << "********** Function: " << MF.getName() << '\n'); // Only run if conversion if the target wants it. - if (!MF.getTarget() - .getSubtarget<TargetSubtargetInfo>() - .enableEarlyIfConversion()) + const TargetSubtargetInfo &STI = MF.getSubtarget(); + if (!STI.enableEarlyIfConversion()) return false; - TII = MF.getSubtarget().getInstrInfo(); - TRI = MF.getSubtarget().getRegisterInfo(); - SchedModel = - MF.getTarget().getSubtarget<TargetSubtargetInfo>().getSchedModel(); + TII = STI.getInstrInfo(); + TRI = STI.getRegisterInfo(); + SchedModel = STI.getSchedModel(); MRI = &MF.getRegInfo(); DomTree = &getAnalysis<MachineDominatorTree>(); Loops = getAnalysisIfAvailable<MachineLoopInfo>(); diff --git a/lib/CodeGen/ErlangGC.cpp b/lib/CodeGen/ErlangGC.cpp index 85b0893..024946d 100644 --- a/lib/CodeGen/ErlangGC.cpp +++ b/lib/CodeGen/ErlangGC.cpp @@ -27,56 +27,20 @@ using namespace llvm; namespace { - class ErlangGC : public GCStrategy { - MCSymbol *InsertLabel(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - DebugLoc DL) const; - public: - ErlangGC(); - bool findCustomSafePoints(GCFunctionInfo &FI, MachineFunction &MF) override; - }; - +class ErlangGC : public GCStrategy { +public: + ErlangGC(); +}; } -static GCRegistry::Add<ErlangGC> -X("erlang", "erlang-compatible garbage collector"); +static GCRegistry::Add<ErlangGC> X("erlang", + "erlang-compatible garbage collector"); -void llvm::linkErlangGC() { } +void llvm::linkErlangGC() {} ErlangGC::ErlangGC() { InitRoots = false; NeededSafePoints = 1 << GC::PostCall; UsesMetadata = true; CustomRoots = false; - CustomSafePoints = true; -} - -MCSymbol *ErlangGC::InsertLabel(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - DebugLoc DL) const { - const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo(); - MCSymbol *Label = MBB.getParent()->getContext().CreateTempSymbol(); - BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label); - return Label; -} - -bool ErlangGC::findCustomSafePoints(GCFunctionInfo &FI, MachineFunction &MF) { - for (MachineFunction::iterator BBI = MF.begin(), BBE = MF.end(); BBI != BBE; - ++BBI) - for (MachineBasicBlock::iterator MI = BBI->begin(), ME = BBI->end(); - MI != ME; ++MI) - - if (MI->getDesc().isCall()) { - - // Do not treat tail call sites as safe points. - if (MI->getDesc().isTerminator()) - continue; - - /* Code copied from VisitCallPoint(...) */ - MachineBasicBlock::iterator RAI = MI; ++RAI; - MCSymbol* Label = InsertLabel(*MI->getParent(), RAI, MI->getDebugLoc()); - FI.addSafePoint(GC::PostCall, Label, MI->getDebugLoc()); - } - - return false; } diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp index 3680498..b3a22c8 100644 --- a/lib/CodeGen/ExecutionDepsFix.cpp +++ b/lib/CodeGen/ExecutionDepsFix.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -74,6 +75,9 @@ struct DomainValue { // Is domain available? bool hasDomain(unsigned domain) const { + assert(domain < + static_cast<unsigned>(std::numeric_limits<unsigned>::digits) && + "undefined behavior"); return AvailableDomains & (1u << domain); } @@ -133,7 +137,7 @@ class ExeDepsFix : public MachineFunctionPass { MachineFunction *MF; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; - std::vector<int> AliasMap; + std::vector<SmallVector<int, 1>> AliasMap; const unsigned NumRegs; LiveReg *LiveRegs; typedef DenseMap<MachineBasicBlock*, LiveReg*> LiveOutMap; @@ -169,8 +173,8 @@ public: } private: - // Register mapping. - int regIndex(unsigned Reg); + iterator_range<SmallVectorImpl<int>::const_iterator> + regIndizes(unsigned Reg) const; // DomainValue allocation. DomainValue *alloc(int domain = -1); @@ -201,11 +205,13 @@ private: char ExeDepsFix::ID = 0; -/// Translate TRI register number to an index into our smaller tables of -/// interesting registers. Return -1 for boring registers. -int ExeDepsFix::regIndex(unsigned Reg) { +/// Translate TRI register number to a list of indizes into our stmaller tables +/// of interesting registers. +iterator_range<SmallVectorImpl<int>::const_iterator> +ExeDepsFix::regIndizes(unsigned Reg) const { assert(Reg < AliasMap.size() && "Invalid register"); - return AliasMap[Reg]; + const auto &Entry = AliasMap[Reg]; + return make_range(Entry.begin(), Entry.end()); } DomainValue *ExeDepsFix::alloc(int domain) { @@ -338,9 +344,11 @@ bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) { // All uses of B are referred to A. B->Next = retain(A); - for (unsigned rx = 0; rx != NumRegs; ++rx) + for (unsigned rx = 0; rx != NumRegs; ++rx) { + assert(LiveRegs && "no space allocated for live registers"); if (LiveRegs[rx].Value == B) setLiveReg(rx, A); + } return true; } @@ -370,13 +378,12 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { if (MBB->pred_empty()) { for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(), e = MBB->livein_end(); i != e; ++i) { - int rx = regIndex(*i); - if (rx < 0) - continue; - // Treat function live-ins as if they were defined just before the first - // instruction. Usually, function arguments are set up immediately - // before the call. - LiveRegs[rx].Def = -1; + for (int rx : regIndizes(*i)) { + // Treat function live-ins as if they were defined just before the first + // instruction. Usually, function arguments are set up immediately + // before the call. + LiveRegs[rx].Def = -1; + } } DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": entry\n"); return; @@ -467,26 +474,26 @@ void ExeDepsFix::visitInstr(MachineInstr *MI) { /// or undef use. bool ExeDepsFix::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx, unsigned Pref) { - int rx = regIndex(MI->getOperand(OpIdx).getReg()); - if (rx < 0) - return false; - - unsigned Clearance = CurInstr - LiveRegs[rx].Def; - DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref); + unsigned reg = MI->getOperand(OpIdx).getReg(); + for (int rx : regIndizes(reg)) { + unsigned Clearance = CurInstr - LiveRegs[rx].Def; + DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref); - if (Pref > Clearance) { - DEBUG(dbgs() << ": Break dependency.\n"); - return true; - } - // The current clearance seems OK, but we may be ignoring a def from a - // back-edge. - if (!SeenUnknownBackEdge || Pref <= unsigned(CurInstr)) { - DEBUG(dbgs() << ": OK .\n"); + if (Pref > Clearance) { + DEBUG(dbgs() << ": Break dependency.\n"); + continue; + } + // The current clearance seems OK, but we may be ignoring a def from a + // back-edge. + if (!SeenUnknownBackEdge || Pref <= unsigned(CurInstr)) { + DEBUG(dbgs() << ": OK .\n"); + return false; + } + // A def from an unprocessed back-edge may make us break this dependency. + DEBUG(dbgs() << ": Wait for back-edge to resolve.\n"); return false; } - // A def from an unprocessed back-edge may make us break this dependency. - DEBUG(dbgs() << ": Wait for back-edge to resolve.\n"); - return false; + return true; } // Update def-ages for registers defined by MI. @@ -514,26 +521,24 @@ void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) { break; if (MO.isUse()) continue; - int rx = regIndex(MO.getReg()); - if (rx < 0) - continue; - - // This instruction explicitly defines rx. - DEBUG(dbgs() << TRI->getName(RC->getRegister(rx)) << ":\t" << CurInstr - << '\t' << *MI); - - // Check clearance before partial register updates. - // Call breakDependence before setting LiveRegs[rx].Def. - unsigned Pref = TII->getPartialRegUpdateClearance(MI, i, TRI); - if (Pref && shouldBreakDependence(MI, i, Pref)) - TII->breakPartialRegDependency(MI, i, TRI); - - // How many instructions since rx was last written? - LiveRegs[rx].Def = CurInstr; - - // Kill off domains redefined by generic instructions. - if (Kill) - kill(rx); + for (int rx : regIndizes(MO.getReg())) { + // This instruction explicitly defines rx. + DEBUG(dbgs() << TRI->getName(RC->getRegister(rx)) << ":\t" << CurInstr + << '\t' << *MI); + + // Check clearance before partial register updates. + // Call breakDependence before setting LiveRegs[rx].Def. + unsigned Pref = TII->getPartialRegUpdateClearance(MI, i, TRI); + if (Pref && shouldBreakDependence(MI, i, Pref)) + TII->breakPartialRegDependency(MI, i, TRI); + + // How many instructions since rx was last written? + LiveRegs[rx].Def = CurInstr; + + // Kill off domains redefined by generic instructions. + if (Kill) + kill(rx); + } } ++CurInstr; } @@ -582,19 +587,19 @@ void ExeDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) { e = mi->getDesc().getNumOperands(); i != e; ++i) { MachineOperand &mo = mi->getOperand(i); if (!mo.isReg()) continue; - int rx = regIndex(mo.getReg()); - if (rx < 0) continue; - force(rx, domain); + for (int rx : regIndizes(mo.getReg())) { + force(rx, domain); + } } // Kill all defs and force them. for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) { MachineOperand &mo = mi->getOperand(i); if (!mo.isReg()) continue; - int rx = regIndex(mo.getReg()); - if (rx < 0) continue; - kill(rx); - force(rx, domain); + for (int rx : regIndizes(mo.getReg())) { + kill(rx); + force(rx, domain); + } } } @@ -611,9 +616,10 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { e = mi->getDesc().getNumOperands(); i != e; ++i) { MachineOperand &mo = mi->getOperand(i); if (!mo.isReg()) continue; - int rx = regIndex(mo.getReg()); - if (rx < 0) continue; - if (DomainValue *dv = LiveRegs[rx].Value) { + for (int rx : regIndizes(mo.getReg())) { + DomainValue *dv = LiveRegs[rx].Value; + if (dv == nullptr) + continue; // Bitmask of domains that dv and available have in common. unsigned common = dv->getCommonDomains(available); // Is it possible to use this collapsed register for free? @@ -645,6 +651,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { SmallVector<LiveReg, 4> Regs; for (SmallVectorImpl<int>::iterator i=used.begin(), e=used.end(); i!=e; ++i) { int rx = *i; + assert(LiveRegs && "no space allocated for live registers"); const LiveReg &LR = LiveRegs[rx]; // This useless DomainValue could have been missed above. if (!LR.Value->getCommonDomains(available)) { @@ -684,9 +691,11 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { continue; // If latest didn't merge, it is useless now. Kill all registers using it. - for (SmallVectorImpl<int>::iterator i=used.begin(), e=used.end(); i!=e; ++i) - if (LiveRegs[*i].Value == Latest) - kill(*i); + for (int i : used) { + assert(LiveRegs && "no space allocated for live registers"); + if (LiveRegs[i].Value == Latest) + kill(i); + } } // dv is the DomainValue we are going to use for this instruction. @@ -703,11 +712,11 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { ii != ee; ++ii) { MachineOperand &mo = *ii; if (!mo.isReg()) continue; - int rx = regIndex(mo.getReg()); - if (rx < 0) continue; - if (!LiveRegs[rx].Value || (mo.isDef() && LiveRegs[rx].Value != dv)) { - kill(rx); - setLiveReg(rx, dv); + for (int rx : regIndizes(mo.getReg())) { + if (!LiveRegs[rx].Value || (mo.isDef() && LiveRegs[rx].Value != dv)) { + kill(rx); + setLiveReg(rx, dv); + } } } } @@ -735,13 +744,13 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { // Initialize the AliasMap on the first use. if (AliasMap.empty()) { - // Given a PhysReg, AliasMap[PhysReg] is either the relevant index into RC, - // or -1. - AliasMap.resize(TRI->getNumRegs(), -1); + // Given a PhysReg, AliasMap[PhysReg] returns a list of indices into RC and + // therefore the LiveRegs array. + AliasMap.resize(TRI->getNumRegs()); for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i) for (MCRegAliasIterator AI(RC->getRegister(i), TRI, true); AI.isValid(); ++AI) - AliasMap[*AI] = i; + AliasMap[*AI].push_back(i); } MachineBasicBlock *Entry = MF->begin(); diff --git a/lib/CodeGen/ForwardControlFlowIntegrity.cpp b/lib/CodeGen/ForwardControlFlowIntegrity.cpp index 5e7e853..63c3699 100644 --- a/lib/CodeGen/ForwardControlFlowIntegrity.cpp +++ b/lib/CodeGen/ForwardControlFlowIntegrity.cpp @@ -25,9 +25,9 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/InlineAsm.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp index ed40982..a2c5fce 100644 --- a/lib/CodeGen/GCMetadata.cpp +++ b/lib/CodeGen/GCMetadata.cpp @@ -24,22 +24,20 @@ using namespace llvm; namespace { - - class Printer : public FunctionPass { - static char ID; - raw_ostream &OS; - - public: - explicit Printer(raw_ostream &OS) : FunctionPass(ID), OS(OS) {} +class Printer : public FunctionPass { + static char ID; + raw_ostream &OS; - const char *getPassName() const override; - void getAnalysisUsage(AnalysisUsage &AU) const override; +public: + explicit Printer(raw_ostream &OS) : FunctionPass(ID), OS(OS) {} - bool runOnFunction(Function &F) override; - bool doFinalization(Module &M) override; - }; + const char *getPassName() const override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnFunction(Function &F) override; + bool doFinalization(Module &M) override; +}; } INITIALIZE_PASS(GCModuleInfo, "collector-metadata", @@ -48,7 +46,7 @@ INITIALIZE_PASS(GCModuleInfo, "collector-metadata", // ----------------------------------------------------------------------------- GCFunctionInfo::GCFunctionInfo(const Function &F, GCStrategy &S) - : F(F), S(S), FrameSize(~0LL) {} + : F(F), S(S), FrameSize(~0LL) {} GCFunctionInfo::~GCFunctionInfo() {} @@ -56,51 +54,29 @@ GCFunctionInfo::~GCFunctionInfo() {} char GCModuleInfo::ID = 0; -GCModuleInfo::GCModuleInfo() - : ImmutablePass(ID) { +GCModuleInfo::GCModuleInfo() : ImmutablePass(ID) { initializeGCModuleInfoPass(*PassRegistry::getPassRegistry()); } -GCStrategy *GCModuleInfo::getOrCreateStrategy(const Module *M, - const std::string &Name) { - strategy_map_type::iterator NMI = StrategyMap.find(Name); - if (NMI != StrategyMap.end()) - return NMI->getValue(); - - for (GCRegistry::iterator I = GCRegistry::begin(), - E = GCRegistry::end(); I != E; ++I) { - if (Name == I->getName()) { - std::unique_ptr<GCStrategy> S = I->instantiate(); - S->M = M; - S->Name = Name; - StrategyMap[Name] = S.get(); - StrategyList.push_back(std::move(S)); - return StrategyList.back().get(); - } - } - - dbgs() << "unsupported GC: " << Name << "\n"; - llvm_unreachable(nullptr); -} - GCFunctionInfo &GCModuleInfo::getFunctionInfo(const Function &F) { assert(!F.isDeclaration() && "Can only get GCFunctionInfo for a definition!"); assert(F.hasGC()); - + finfo_map_type::iterator I = FInfoMap.find(&F); if (I != FInfoMap.end()) return *I->second; - - GCStrategy *S = getOrCreateStrategy(F.getParent(), F.getGC()); - GCFunctionInfo *GFI = S->insertFunctionInfo(F); + + GCStrategy *S = getGCStrategy(F.getGC()); + Functions.push_back(make_unique<GCFunctionInfo>(F, *S)); + GCFunctionInfo *GFI = Functions.back().get(); FInfoMap[&F] = GFI; return *GFI; } void GCModuleInfo::clear() { + Functions.clear(); FInfoMap.clear(); - StrategyMap.clear(); - StrategyList.clear(); + GCStrategyList.clear(); } // ----------------------------------------------------------------------------- @@ -111,7 +87,6 @@ FunctionPass *llvm::createGCInfoPrinter(raw_ostream &OS) { return new Printer(OS); } - const char *Printer::getPassName() const { return "Print Garbage Collector Information"; } @@ -124,42 +99,49 @@ void Printer::getAnalysisUsage(AnalysisUsage &AU) const { static const char *DescKind(GC::PointKind Kind) { switch (Kind) { - case GC::Loop: return "loop"; - case GC::Return: return "return"; - case GC::PreCall: return "pre-call"; - case GC::PostCall: return "post-call"; + case GC::Loop: + return "loop"; + case GC::Return: + return "return"; + case GC::PreCall: + return "pre-call"; + case GC::PostCall: + return "post-call"; } llvm_unreachable("Invalid point kind"); } bool Printer::runOnFunction(Function &F) { - if (F.hasGC()) return false; - + if (F.hasGC()) + return false; + GCFunctionInfo *FD = &getAnalysis<GCModuleInfo>().getFunctionInfo(F); - + OS << "GC roots for " << FD->getFunction().getName() << ":\n"; for (GCFunctionInfo::roots_iterator RI = FD->roots_begin(), - RE = FD->roots_end(); RI != RE; ++RI) + RE = FD->roots_end(); + RI != RE; ++RI) OS << "\t" << RI->Num << "\t" << RI->StackOffset << "[sp]\n"; - + OS << "GC safe points for " << FD->getFunction().getName() << ":\n"; - for (GCFunctionInfo::iterator PI = FD->begin(), - PE = FD->end(); PI != PE; ++PI) { - - OS << "\t" << PI->Label->getName() << ": " - << DescKind(PI->Kind) << ", live = {"; - + for (GCFunctionInfo::iterator PI = FD->begin(), PE = FD->end(); PI != PE; + ++PI) { + + OS << "\t" << PI->Label->getName() << ": " << DescKind(PI->Kind) + << ", live = {"; + for (GCFunctionInfo::live_iterator RI = FD->live_begin(PI), - RE = FD->live_end(PI);;) { + RE = FD->live_end(PI); + ;) { OS << " " << RI->Num; if (++RI == RE) break; OS << ","; } - + OS << " }\n"; } - + return false; } @@ -169,3 +151,23 @@ bool Printer::doFinalization(Module &M) { GMI->clear(); return false; } + + +GCStrategy *GCModuleInfo::getGCStrategy(const StringRef Name) { + // TODO: Arguably, just doing a linear search would be faster for small N + auto NMI = GCStrategyMap.find(Name); + if (NMI != GCStrategyMap.end()) + return NMI->getValue(); + + for (auto& Entry : GCRegistry::entries()) { + if (Name == Entry.getName()) { + std::unique_ptr<GCStrategy> S = Entry.instantiate(); + S->Name = Name; + GCStrategyMap[Name] = S.get(); + GCStrategyList.push_back(std::move(S)); + return GCStrategyList.back().get(); + } + } + + report_fatal_error(std::string("unsupported GC: ") + Name); +} diff --git a/lib/CodeGen/GCMetadataPrinter.cpp b/lib/CodeGen/GCMetadataPrinter.cpp index f80e9ce..bb8cfa1 100644 --- a/lib/CodeGen/GCMetadataPrinter.cpp +++ b/lib/CodeGen/GCMetadataPrinter.cpp @@ -14,14 +14,6 @@ #include "llvm/CodeGen/GCMetadataPrinter.h" using namespace llvm; -GCMetadataPrinter::GCMetadataPrinter() { } +GCMetadataPrinter::GCMetadataPrinter() {} -GCMetadataPrinter::~GCMetadataPrinter() { } - -void GCMetadataPrinter::beginAssembly(AsmPrinter &AP) { - // Default is no action. -} - -void GCMetadataPrinter::finishAssembly(AsmPrinter &AP) { - // Default is no action. -} +GCMetadataPrinter::~GCMetadataPrinter() {} diff --git a/lib/CodeGen/GCRootLowering.cpp b/lib/CodeGen/GCRootLowering.cpp new file mode 100644 index 0000000..9d38e4c --- /dev/null +++ b/lib/CodeGen/GCRootLowering.cpp @@ -0,0 +1,351 @@ +//===-- GCRootLowering.cpp - Garbage collection infrastructure ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the lowering for the gc.root mechanism. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GCMetadata.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +namespace { + +/// LowerIntrinsics - This pass rewrites calls to the llvm.gcread or +/// llvm.gcwrite intrinsics, replacing them with simple loads and stores as +/// directed by the GCStrategy. It also performs automatic root initialization +/// and custom intrinsic lowering. +class LowerIntrinsics : public FunctionPass { + bool PerformDefaultLowering(Function &F, GCStrategy &Coll); + +public: + static char ID; + + LowerIntrinsics(); + const char *getPassName() const override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + + bool doInitialization(Module &M) override; + bool runOnFunction(Function &F) override; +}; + +/// GCMachineCodeAnalysis - This is a target-independent pass over the machine +/// function representation to identify safe points for the garbage collector +/// in the machine code. It inserts labels at safe points and populates a +/// GCMetadata record for each function. +class GCMachineCodeAnalysis : public MachineFunctionPass { + GCFunctionInfo *FI; + MachineModuleInfo *MMI; + const TargetInstrInfo *TII; + + void FindSafePoints(MachineFunction &MF); + void VisitCallPoint(MachineBasicBlock::iterator MI); + MCSymbol *InsertLabel(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + DebugLoc DL) const; + + void FindStackOffsets(MachineFunction &MF); + +public: + static char ID; + + GCMachineCodeAnalysis(); + void getAnalysisUsage(AnalysisUsage &AU) const override; + + bool runOnMachineFunction(MachineFunction &MF) override; +}; +} + +// ----------------------------------------------------------------------------- + +INITIALIZE_PASS_BEGIN(LowerIntrinsics, "gc-lowering", "GC Lowering", false, + false) +INITIALIZE_PASS_DEPENDENCY(GCModuleInfo) +INITIALIZE_PASS_END(LowerIntrinsics, "gc-lowering", "GC Lowering", false, false) + +FunctionPass *llvm::createGCLoweringPass() { return new LowerIntrinsics(); } + +char LowerIntrinsics::ID = 0; + +LowerIntrinsics::LowerIntrinsics() : FunctionPass(ID) { + initializeLowerIntrinsicsPass(*PassRegistry::getPassRegistry()); +} + +const char *LowerIntrinsics::getPassName() const { + return "Lower Garbage Collection Instructions"; +} + +void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const { + FunctionPass::getAnalysisUsage(AU); + AU.addRequired<GCModuleInfo>(); + AU.addPreserved<DominatorTreeWrapperPass>(); +} + +static bool NeedsDefaultLoweringPass(const GCStrategy &C) { + // Default lowering is necessary only if read or write barriers have a default + // action. The default for roots is no action. + return !C.customWriteBarrier() || !C.customReadBarrier() || + C.initializeRoots(); +} + +/// doInitialization - If this module uses the GC intrinsics, find them now. +bool LowerIntrinsics::doInitialization(Module &M) { + GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>(); + assert(MI && "LowerIntrinsics didn't require GCModuleInfo!?"); + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + if (!I->isDeclaration() && I->hasGC()) + MI->getFunctionInfo(*I); // Instantiate the GC strategy. + + return false; +} + +/// CouldBecomeSafePoint - Predicate to conservatively determine whether the +/// instruction could introduce a safe point. +static bool CouldBecomeSafePoint(Instruction *I) { + // The natural definition of instructions which could introduce safe points + // are: + // + // - call, invoke (AfterCall, BeforeCall) + // - phis (Loops) + // - invoke, ret, unwind (Exit) + // + // However, instructions as seemingly inoccuous as arithmetic can become + // libcalls upon lowering (e.g., div i64 on a 32-bit platform), so instead + // it is necessary to take a conservative approach. + + if (isa<AllocaInst>(I) || isa<GetElementPtrInst>(I) || isa<StoreInst>(I) || + isa<LoadInst>(I)) + return false; + + // llvm.gcroot is safe because it doesn't do anything at runtime. + if (CallInst *CI = dyn_cast<CallInst>(I)) + if (Function *F = CI->getCalledFunction()) + if (unsigned IID = F->getIntrinsicID()) + if (IID == Intrinsic::gcroot) + return false; + + return true; +} + +static bool InsertRootInitializers(Function &F, AllocaInst **Roots, + unsigned Count) { + // Scroll past alloca instructions. + BasicBlock::iterator IP = F.getEntryBlock().begin(); + while (isa<AllocaInst>(IP)) + ++IP; + + // Search for initializers in the initial BB. + SmallPtrSet<AllocaInst *, 16> InitedRoots; + for (; !CouldBecomeSafePoint(IP); ++IP) + if (StoreInst *SI = dyn_cast<StoreInst>(IP)) + if (AllocaInst *AI = + dyn_cast<AllocaInst>(SI->getOperand(1)->stripPointerCasts())) + InitedRoots.insert(AI); + + // Add root initializers. + bool MadeChange = false; + + for (AllocaInst **I = Roots, **E = Roots + Count; I != E; ++I) + if (!InitedRoots.count(*I)) { + StoreInst *SI = new StoreInst( + ConstantPointerNull::get(cast<PointerType>( + cast<PointerType>((*I)->getType())->getElementType())), + *I); + SI->insertAfter(*I); + MadeChange = true; + } + + return MadeChange; +} + +/// runOnFunction - Replace gcread/gcwrite intrinsics with loads and stores. +/// Leave gcroot intrinsics; the code generator needs to see those. +bool LowerIntrinsics::runOnFunction(Function &F) { + // Quick exit for functions that do not use GC. + if (!F.hasGC()) + return false; + + GCFunctionInfo &FI = getAnalysis<GCModuleInfo>().getFunctionInfo(F); + GCStrategy &S = FI.getStrategy(); + + bool MadeChange = false; + + if (NeedsDefaultLoweringPass(S)) + MadeChange |= PerformDefaultLowering(F, S); + + return MadeChange; +} + +bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) { + bool LowerWr = !S.customWriteBarrier(); + bool LowerRd = !S.customReadBarrier(); + bool InitRoots = S.initializeRoots(); + + SmallVector<AllocaInst *, 32> Roots; + + bool MadeChange = false; + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) { + if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++)) { + Function *F = CI->getCalledFunction(); + switch (F->getIntrinsicID()) { + case Intrinsic::gcwrite: + if (LowerWr) { + // Replace a write barrier with a simple store. + Value *St = + new StoreInst(CI->getArgOperand(0), CI->getArgOperand(2), CI); + CI->replaceAllUsesWith(St); + CI->eraseFromParent(); + } + break; + case Intrinsic::gcread: + if (LowerRd) { + // Replace a read barrier with a simple load. + Value *Ld = new LoadInst(CI->getArgOperand(1), "", CI); + Ld->takeName(CI); + CI->replaceAllUsesWith(Ld); + CI->eraseFromParent(); + } + break; + case Intrinsic::gcroot: + if (InitRoots) { + // Initialize the GC root, but do not delete the intrinsic. The + // backend needs the intrinsic to flag the stack slot. + Roots.push_back( + cast<AllocaInst>(CI->getArgOperand(0)->stripPointerCasts())); + } + break; + default: + continue; + } + + MadeChange = true; + } + } + } + + if (Roots.size()) + MadeChange |= InsertRootInitializers(F, Roots.begin(), Roots.size()); + + return MadeChange; +} + +// ----------------------------------------------------------------------------- + +char GCMachineCodeAnalysis::ID = 0; +char &llvm::GCMachineCodeAnalysisID = GCMachineCodeAnalysis::ID; + +INITIALIZE_PASS(GCMachineCodeAnalysis, "gc-analysis", + "Analyze Machine Code For Garbage Collection", false, false) + +GCMachineCodeAnalysis::GCMachineCodeAnalysis() : MachineFunctionPass(ID) {} + +void GCMachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + MachineFunctionPass::getAnalysisUsage(AU); + AU.setPreservesAll(); + AU.addRequired<MachineModuleInfo>(); + AU.addRequired<GCModuleInfo>(); +} + +MCSymbol *GCMachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + DebugLoc DL) const { + MCSymbol *Label = MBB.getParent()->getContext().CreateTempSymbol(); + BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label); + return Label; +} + +void GCMachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) { + // Find the return address (next instruction), too, so as to bracket the call + // instruction. + MachineBasicBlock::iterator RAI = CI; + ++RAI; + + if (FI->getStrategy().needsSafePoint(GC::PreCall)) { + MCSymbol *Label = InsertLabel(*CI->getParent(), CI, CI->getDebugLoc()); + FI->addSafePoint(GC::PreCall, Label, CI->getDebugLoc()); + } + + if (FI->getStrategy().needsSafePoint(GC::PostCall)) { + MCSymbol *Label = InsertLabel(*CI->getParent(), RAI, CI->getDebugLoc()); + FI->addSafePoint(GC::PostCall, Label, CI->getDebugLoc()); + } +} + +void GCMachineCodeAnalysis::FindSafePoints(MachineFunction &MF) { + for (MachineFunction::iterator BBI = MF.begin(), BBE = MF.end(); BBI != BBE; + ++BBI) + for (MachineBasicBlock::iterator MI = BBI->begin(), ME = BBI->end(); + MI != ME; ++MI) + if (MI->isCall()) { + // Do not treat tail or sibling call sites as safe points. This is + // legal since any arguments passed to the callee which live in the + // remnants of the callers frame will be owned and updated by the + // callee if required. + if (MI->isTerminator()) + continue; + VisitCallPoint(MI); + } +} + +void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) { + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + assert(TFI && "TargetRegisterInfo not available!"); + + for (GCFunctionInfo::roots_iterator RI = FI->roots_begin(); + RI != FI->roots_end();) { + // If the root references a dead object, no need to keep it. + if (MF.getFrameInfo()->isDeadObjectIndex(RI->Num)) { + RI = FI->removeStackRoot(RI); + } else { + RI->StackOffset = TFI->getFrameIndexOffset(MF, RI->Num); + ++RI; + } + } +} + +bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) { + // Quick exit for functions that do not use GC. + if (!MF.getFunction()->hasGC()) + return false; + + FI = &getAnalysis<GCModuleInfo>().getFunctionInfo(*MF.getFunction()); + if (!FI->getStrategy().needsSafePoints()) + return false; + + MMI = &getAnalysis<MachineModuleInfo>(); + TII = MF.getSubtarget().getInstrInfo(); + + // Find the size of the stack frame. + FI->setFrameSize(MF.getFrameInfo()->getStackSize()); + + // Find all safe points. + FindSafePoints(MF); + + // Find the stack offsets for all roots. + FindStackOffsets(MF); + + return false; +} diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp index b346657..554d326 100644 --- a/lib/CodeGen/GCStrategy.cpp +++ b/lib/CodeGen/GCStrategy.cpp @@ -1,4 +1,4 @@ -//===-- GCStrategy.cpp - Garbage collection infrastructure -----------------===// +//===-- GCStrategy.cpp - Garbage Collector Description --------------------===// // // The LLVM Compiler Infrastructure // @@ -7,417 +7,16 @@ // //===----------------------------------------------------------------------===// // -// This file implements target- and collector-independent garbage collection -// infrastructure. -// -// GCMachineCodeAnalysis identifies the GC safe points in the machine code. -// Roots are identified in SelectionDAGISel. +// This file implements the policy object GCStrategy which describes the +// behavior of a given garbage collector. // //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GCStrategy.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; -namespace { - - /// LowerIntrinsics - This pass rewrites calls to the llvm.gcread or - /// llvm.gcwrite intrinsics, replacing them with simple loads and stores as - /// directed by the GCStrategy. It also performs automatic root initialization - /// and custom intrinsic lowering. - class LowerIntrinsics : public FunctionPass { - static bool NeedsDefaultLoweringPass(const GCStrategy &C); - static bool NeedsCustomLoweringPass(const GCStrategy &C); - static bool CouldBecomeSafePoint(Instruction *I); - bool PerformDefaultLowering(Function &F, GCStrategy &Coll); - static bool InsertRootInitializers(Function &F, - AllocaInst **Roots, unsigned Count); - - public: - static char ID; - - LowerIntrinsics(); - const char *getPassName() const override; - void getAnalysisUsage(AnalysisUsage &AU) const override; - - bool doInitialization(Module &M) override; - bool runOnFunction(Function &F) override; - }; - - - /// GCMachineCodeAnalysis - This is a target-independent pass over the machine - /// function representation to identify safe points for the garbage collector - /// in the machine code. It inserts labels at safe points and populates a - /// GCMetadata record for each function. - class GCMachineCodeAnalysis : public MachineFunctionPass { - const TargetMachine *TM; - GCFunctionInfo *FI; - MachineModuleInfo *MMI; - const TargetInstrInfo *TII; - - void FindSafePoints(MachineFunction &MF); - void VisitCallPoint(MachineBasicBlock::iterator MI); - MCSymbol *InsertLabel(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - DebugLoc DL) const; - - void FindStackOffsets(MachineFunction &MF); - - public: - static char ID; - - GCMachineCodeAnalysis(); - void getAnalysisUsage(AnalysisUsage &AU) const override; - - bool runOnMachineFunction(MachineFunction &MF) override; - }; - -} - -// ----------------------------------------------------------------------------- - -GCStrategy::GCStrategy() : - NeededSafePoints(0), - CustomReadBarriers(false), - CustomWriteBarriers(false), - CustomRoots(false), - CustomSafePoints(false), - InitRoots(true), - UsesMetadata(false) -{} - -bool GCStrategy::initializeCustomLowering(Module &M) { return false; } - -bool GCStrategy::performCustomLowering(Function &F) { - dbgs() << "gc " << getName() << " must override performCustomLowering.\n"; - llvm_unreachable("must override performCustomLowering"); -} - - -bool GCStrategy::findCustomSafePoints(GCFunctionInfo& FI, MachineFunction &F) { - dbgs() << "gc " << getName() << " must override findCustomSafePoints.\n"; - llvm_unreachable(nullptr); -} - - -GCFunctionInfo *GCStrategy::insertFunctionInfo(const Function &F) { - Functions.push_back(make_unique<GCFunctionInfo>(F, *this)); - return Functions.back().get(); -} - -// ----------------------------------------------------------------------------- - -INITIALIZE_PASS_BEGIN(LowerIntrinsics, "gc-lowering", "GC Lowering", - false, false) -INITIALIZE_PASS_DEPENDENCY(GCModuleInfo) -INITIALIZE_PASS_END(LowerIntrinsics, "gc-lowering", "GC Lowering", false, false) - -FunctionPass *llvm::createGCLoweringPass() { - return new LowerIntrinsics(); -} - -char LowerIntrinsics::ID = 0; - -LowerIntrinsics::LowerIntrinsics() - : FunctionPass(ID) { - initializeLowerIntrinsicsPass(*PassRegistry::getPassRegistry()); - } - -const char *LowerIntrinsics::getPassName() const { - return "Lower Garbage Collection Instructions"; -} - -void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const { - FunctionPass::getAnalysisUsage(AU); - AU.addRequired<GCModuleInfo>(); - AU.addPreserved<DominatorTreeWrapperPass>(); -} - -/// doInitialization - If this module uses the GC intrinsics, find them now. -bool LowerIntrinsics::doInitialization(Module &M) { - // FIXME: This is rather antisocial in the context of a JIT since it performs - // work against the entire module. But this cannot be done at - // runFunction time (initializeCustomLowering likely needs to change - // the module). - GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>(); - assert(MI && "LowerIntrinsics didn't require GCModuleInfo!?"); - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) - if (!I->isDeclaration() && I->hasGC()) - MI->getFunctionInfo(*I); // Instantiate the GC strategy. - - bool MadeChange = false; - for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I) - if (NeedsCustomLoweringPass(**I)) - if ((*I)->initializeCustomLowering(M)) - MadeChange = true; - - return MadeChange; -} - -bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots, - unsigned Count) { - // Scroll past alloca instructions. - BasicBlock::iterator IP = F.getEntryBlock().begin(); - while (isa<AllocaInst>(IP)) ++IP; - - // Search for initializers in the initial BB. - SmallPtrSet<AllocaInst*,16> InitedRoots; - for (; !CouldBecomeSafePoint(IP); ++IP) - if (StoreInst *SI = dyn_cast<StoreInst>(IP)) - if (AllocaInst *AI = - dyn_cast<AllocaInst>(SI->getOperand(1)->stripPointerCasts())) - InitedRoots.insert(AI); - - // Add root initializers. - bool MadeChange = false; - - for (AllocaInst **I = Roots, **E = Roots + Count; I != E; ++I) - if (!InitedRoots.count(*I)) { - StoreInst* SI = new StoreInst(ConstantPointerNull::get(cast<PointerType>( - cast<PointerType>((*I)->getType())->getElementType())), - *I); - SI->insertAfter(*I); - MadeChange = true; - } - - return MadeChange; -} - -bool LowerIntrinsics::NeedsDefaultLoweringPass(const GCStrategy &C) { - // Default lowering is necessary only if read or write barriers have a default - // action. The default for roots is no action. - return !C.customWriteBarrier() - || !C.customReadBarrier() - || C.initializeRoots(); -} - -bool LowerIntrinsics::NeedsCustomLoweringPass(const GCStrategy &C) { - // Custom lowering is only necessary if enabled for some action. - return C.customWriteBarrier() - || C.customReadBarrier() - || C.customRoots(); -} - -/// CouldBecomeSafePoint - Predicate to conservatively determine whether the -/// instruction could introduce a safe point. -bool LowerIntrinsics::CouldBecomeSafePoint(Instruction *I) { - // The natural definition of instructions which could introduce safe points - // are: - // - // - call, invoke (AfterCall, BeforeCall) - // - phis (Loops) - // - invoke, ret, unwind (Exit) - // - // However, instructions as seemingly inoccuous as arithmetic can become - // libcalls upon lowering (e.g., div i64 on a 32-bit platform), so instead - // it is necessary to take a conservative approach. - - if (isa<AllocaInst>(I) || isa<GetElementPtrInst>(I) || - isa<StoreInst>(I) || isa<LoadInst>(I)) - return false; - - // llvm.gcroot is safe because it doesn't do anything at runtime. - if (CallInst *CI = dyn_cast<CallInst>(I)) - if (Function *F = CI->getCalledFunction()) - if (unsigned IID = F->getIntrinsicID()) - if (IID == Intrinsic::gcroot) - return false; - - return true; -} - -/// runOnFunction - Replace gcread/gcwrite intrinsics with loads and stores. -/// Leave gcroot intrinsics; the code generator needs to see those. -bool LowerIntrinsics::runOnFunction(Function &F) { - // Quick exit for functions that do not use GC. - if (!F.hasGC()) - return false; - - GCFunctionInfo &FI = getAnalysis<GCModuleInfo>().getFunctionInfo(F); - GCStrategy &S = FI.getStrategy(); - - bool MadeChange = false; - - if (NeedsDefaultLoweringPass(S)) - MadeChange |= PerformDefaultLowering(F, S); - - bool UseCustomLoweringPass = NeedsCustomLoweringPass(S); - if (UseCustomLoweringPass) - MadeChange |= S.performCustomLowering(F); - - // Custom lowering may modify the CFG, so dominators must be recomputed. - if (UseCustomLoweringPass) { - if (DominatorTreeWrapperPass *DTWP = - getAnalysisIfAvailable<DominatorTreeWrapperPass>()) - DTWP->getDomTree().recalculate(F); - } - - return MadeChange; -} - -bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) { - bool LowerWr = !S.customWriteBarrier(); - bool LowerRd = !S.customReadBarrier(); - bool InitRoots = S.initializeRoots(); - - SmallVector<AllocaInst*, 32> Roots; - - bool MadeChange = false; - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) { - if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++)) { - Function *F = CI->getCalledFunction(); - switch (F->getIntrinsicID()) { - case Intrinsic::gcwrite: - if (LowerWr) { - // Replace a write barrier with a simple store. - Value *St = new StoreInst(CI->getArgOperand(0), - CI->getArgOperand(2), CI); - CI->replaceAllUsesWith(St); - CI->eraseFromParent(); - } - break; - case Intrinsic::gcread: - if (LowerRd) { - // Replace a read barrier with a simple load. - Value *Ld = new LoadInst(CI->getArgOperand(1), "", CI); - Ld->takeName(CI); - CI->replaceAllUsesWith(Ld); - CI->eraseFromParent(); - } - break; - case Intrinsic::gcroot: - if (InitRoots) { - // Initialize the GC root, but do not delete the intrinsic. The - // backend needs the intrinsic to flag the stack slot. - Roots.push_back(cast<AllocaInst>( - CI->getArgOperand(0)->stripPointerCasts())); - } - break; - default: - continue; - } - - MadeChange = true; - } - } - } - - if (Roots.size()) - MadeChange |= InsertRootInitializers(F, Roots.begin(), Roots.size()); - - return MadeChange; -} - -// ----------------------------------------------------------------------------- - -char GCMachineCodeAnalysis::ID = 0; -char &llvm::GCMachineCodeAnalysisID = GCMachineCodeAnalysis::ID; - -INITIALIZE_PASS(GCMachineCodeAnalysis, "gc-analysis", - "Analyze Machine Code For Garbage Collection", false, false) - -GCMachineCodeAnalysis::GCMachineCodeAnalysis() - : MachineFunctionPass(ID) {} - -void GCMachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { - MachineFunctionPass::getAnalysisUsage(AU); - AU.setPreservesAll(); - AU.addRequired<MachineModuleInfo>(); - AU.addRequired<GCModuleInfo>(); -} - -MCSymbol *GCMachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - DebugLoc DL) const { - MCSymbol *Label = MBB.getParent()->getContext().CreateTempSymbol(); - BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label); - return Label; -} - -void GCMachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) { - // Find the return address (next instruction), too, so as to bracket the call - // instruction. - MachineBasicBlock::iterator RAI = CI; - ++RAI; - - if (FI->getStrategy().needsSafePoint(GC::PreCall)) { - MCSymbol* Label = InsertLabel(*CI->getParent(), CI, CI->getDebugLoc()); - FI->addSafePoint(GC::PreCall, Label, CI->getDebugLoc()); - } - - if (FI->getStrategy().needsSafePoint(GC::PostCall)) { - MCSymbol* Label = InsertLabel(*CI->getParent(), RAI, CI->getDebugLoc()); - FI->addSafePoint(GC::PostCall, Label, CI->getDebugLoc()); - } -} - -void GCMachineCodeAnalysis::FindSafePoints(MachineFunction &MF) { - for (MachineFunction::iterator BBI = MF.begin(), - BBE = MF.end(); BBI != BBE; ++BBI) - for (MachineBasicBlock::iterator MI = BBI->begin(), - ME = BBI->end(); MI != ME; ++MI) - if (MI->isCall()) - VisitCallPoint(MI); -} - -void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) { - const TargetFrameLowering *TFI = TM->getSubtargetImpl()->getFrameLowering(); - assert(TFI && "TargetRegisterInfo not available!"); - - for (GCFunctionInfo::roots_iterator RI = FI->roots_begin(); - RI != FI->roots_end();) { - // If the root references a dead object, no need to keep it. - if (MF.getFrameInfo()->isDeadObjectIndex(RI->Num)) { - RI = FI->removeStackRoot(RI); - } else { - RI->StackOffset = TFI->getFrameIndexOffset(MF, RI->Num); - ++RI; - } - } -} - -bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) { - // Quick exit for functions that do not use GC. - if (!MF.getFunction()->hasGC()) - return false; - - FI = &getAnalysis<GCModuleInfo>().getFunctionInfo(*MF.getFunction()); - if (!FI->getStrategy().needsSafePoints()) - return false; - - TM = &MF.getTarget(); - MMI = &getAnalysis<MachineModuleInfo>(); - TII = TM->getSubtargetImpl()->getInstrInfo(); - - // Find the size of the stack frame. - FI->setFrameSize(MF.getFrameInfo()->getStackSize()); - - // Find all safe points. - if (FI->getStrategy().customSafePoints()) { - FI->getStrategy().findCustomSafePoints(*FI, MF); - } else { - FindSafePoints(MF); - } - - // Find the stack offsets for all roots. - FindStackOffsets(MF); - - return false; -} +GCStrategy::GCStrategy() + : UseStatepoints(false), NeededSafePoints(0), CustomReadBarriers(false), + CustomWriteBarriers(false), CustomRoots(false), InitRoots(true), + UsesMetadata(false) {} diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp index 457d7d6..4188e5d 100644 --- a/lib/CodeGen/GlobalMerge.cpp +++ b/lib/CodeGen/GlobalMerge.cpp @@ -54,6 +54,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -64,7 +65,6 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" @@ -90,10 +90,16 @@ EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden, cl::desc("Enable global merge pass on external linkage"), cl::init(false)); -STATISTIC(NumMerged , "Number of globals merged"); +STATISTIC(NumMerged, "Number of globals merged"); namespace { class GlobalMerge : public FunctionPass { const TargetMachine *TM; + const DataLayout *DL; + // FIXME: Infer the maximum possible offset depending on the actual users + // (these max offsets are different for the users inside Thumb or ARM + // functions), see the code that passes in the offset in the ARM backend + // for more information. + unsigned MaxOffset; bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals, Module &M, bool isConst, unsigned AddrSpace) const; @@ -117,8 +123,10 @@ namespace { public: static char ID; // Pass identification, replacement for typeid. - explicit GlobalMerge(const TargetMachine *TM = nullptr) - : FunctionPass(ID), TM(TM) { + explicit GlobalMerge(const TargetMachine *TM = nullptr, + unsigned MaximalOffset = 0) + : FunctionPass(ID), TM(TM), DL(TM->getDataLayout()), + MaxOffset(MaximalOffset) { initializeGlobalMergePass(*PassRegistry::getPassRegistry()); } @@ -138,22 +146,16 @@ namespace { } // end anonymous namespace char GlobalMerge::ID = 0; -INITIALIZE_TM_PASS(GlobalMerge, "global-merge", "Merge global variables", - false, false) +INITIALIZE_PASS_BEGIN(GlobalMerge, "global-merge", "Merge global variables", + false, false) +INITIALIZE_PASS_END(GlobalMerge, "global-merge", "Merge global variables", + false, false) bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, Module &M, bool isConst, unsigned AddrSpace) const { - const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering(); - const DataLayout *DL = TLI->getDataLayout(); - - // FIXME: Infer the maximum possible offset depending on the actual users - // (these max offsets are different for the users inside Thumb or ARM - // functions) - unsigned MaxOffset = TLI->getMaximalGlobalOffset(); - // FIXME: Find better heuristics std::stable_sort(Globals.begin(), Globals.end(), - [DL](const GlobalVariable *GV1, const GlobalVariable *GV2) { + [this](const GlobalVariable *GV1, const GlobalVariable *GV2) { Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType(); Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType(); @@ -282,9 +284,6 @@ bool GlobalMerge::doInitialization(Module &M) { DenseMap<unsigned, SmallVector<GlobalVariable*, 16> > Globals, ConstGlobals, BSSGlobals; - const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering(); - const DataLayout *DL = TLI->getDataLayout(); - unsigned MaxOffset = TLI->getMaximalGlobalOffset(); bool Changed = false; setMustKeepGlobalVariables(M); @@ -357,6 +356,6 @@ bool GlobalMerge::doFinalization(Module &M) { return false; } -Pass *llvm::createGlobalMergePass(const TargetMachine *TM) { - return new GlobalMerge(TM); +Pass *llvm::createGlobalMergePass(const TargetMachine *TM, unsigned Offset) { + return new GlobalMerge(TM, Offset); } diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index e84d25d..7a29569 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -271,15 +271,13 @@ INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false) bool IfConverter::runOnMachineFunction(MachineFunction &MF) { - TLI = MF.getSubtarget().getTargetLowering(); - TII = MF.getSubtarget().getInstrInfo(); - TRI = MF.getSubtarget().getRegisterInfo(); + const TargetSubtargetInfo &ST = MF.getSubtarget(); + TLI = ST.getTargetLowering(); + TII = ST.getInstrInfo(); + TRI = ST.getRegisterInfo(); MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); MRI = &MF.getRegInfo(); - - const TargetSubtargetInfo &ST = - MF.getTarget().getSubtarget<TargetSubtargetInfo>(); SchedModel.init(ST.getSchedModel(), &ST, TII); if (!TII) return false; @@ -290,7 +288,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { if (!PreRegAlloc) { // Tail merge tend to expose more if-conversion opportunities. BranchFolder BF(true, false, *MBFI, *MBPI); - BFChange = BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(), + BFChange = BF.OptimizeFunction(MF, TII, ST.getRegisterInfo(), getAnalysisIfAvailable<MachineModuleInfo>()); } diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index 6a6e15d..f0d407f 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -508,6 +508,7 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, SmallVector<std::pair<unsigned, VNInfo*>, 8> WorkList; WorkList.push_back(std::make_pair(UseReg, UseVNI)); + LiveInterval &OrigLI = LIS.getInterval(Original); do { unsigned Reg; VNInfo *VNI; @@ -521,8 +522,11 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, // Trace through PHI-defs created by live range splitting. if (VNI->isPHIDef()) { - // Stop at original PHIs. We don't know the value at the predecessors. - if (VNI->def == OrigVNI->def) { + // Stop at original PHIs. We don't know the value at the + // predecessors. Look up the VNInfo for the current definition + // in OrigLI, to properly determine whether or not this phi was + // added by splitting. + if (VNI->def == OrigLI.getVNInfoAt(VNI->def)->def) { DEBUG(dbgs() << "orig phi value\n"); SVI->second.DefByOrigPHI = true; SVI->second.AllDefsAreReloads = false; @@ -542,7 +546,6 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, // Separate all values dominated by OrigVNI into PHIs and non-PHIs. SmallVector<VNInfo*, 8> PHIs, NonPHIs; LiveInterval &LI = LIS.getInterval(Reg); - LiveInterval &OrigLI = LIS.getInterval(Original); for (LiveInterval::vni_iterator VI = LI.vni_begin(), VE = LI.vni_end(); VI != VE; ++VI) { @@ -573,8 +576,8 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, std::tie(SVI, Inserted) = SibValues.insert(std::make_pair(NonPHI, SibValueInfo(Reg, NonPHI))); // Add all the PHIs as dependents of NonPHI. - for (unsigned pi = 0, pe = PHIs.size(); pi != pe; ++pi) - SVI->second.Deps.push_back(PHIs[pi]); + SVI->second.Deps.insert(SVI->second.Deps.end(), PHIs.begin(), + PHIs.end()); // This is the first time we see NonPHI, add it to the worklist. if (Inserted) WorkList.push_back(std::make_pair(Reg, NonPHI)); @@ -1088,7 +1091,8 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops, bool WasCopy = MI->isCopy(); unsigned ImpReg = 0; - bool SpillSubRegs = (MI->getOpcode() == TargetOpcode::PATCHPOINT || + bool SpillSubRegs = (MI->getOpcode() == TargetOpcode::STATEPOINT || + MI->getOpcode() == TargetOpcode::PATCHPOINT || MI->getOpcode() == TargetOpcode::STACKMAP); // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied @@ -1138,13 +1142,8 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops, continue; // FoldMI does not define this physreg. Remove the LI segment. assert(MO->isDead() && "Cannot fold physreg def"); - for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) { - if (LiveRange *LR = LIS.getCachedRegUnit(*Units)) { - SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot(); - if (VNInfo *VNI = LR->getVNInfoAt(Idx)) - LR->removeValNo(VNI); - } - } + SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot(); + LIS.removePhysRegDefAt(Reg, Idx); } LIS.ReplaceMachineInstrInMaps(MI, FoldMI); diff --git a/lib/CodeGen/JumpInstrTables.cpp b/lib/CodeGen/JumpInstrTables.cpp index 20f775c..75fa261 100644 --- a/lib/CodeGen/JumpInstrTables.cpp +++ b/lib/CodeGen/JumpInstrTables.cpp @@ -13,7 +13,6 @@ #define DEBUG_TYPE "jt" #include "llvm/CodeGen/JumpInstrTables.h" - #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/JumpInstrTableInfo.h" #include "llvm/CodeGen/Passes.h" @@ -30,7 +29,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" - #include <vector> using namespace llvm; @@ -117,8 +115,8 @@ bool replaceGlobalValueIndirectUse(GlobalValue *GV, Value *V, Use *U) { if (!isa<GlobalAlias>(C)) C->replaceUsesOfWithOnConstant(GV, V, U); } else { - assert(false && "The Use of a Function symbol is neither an instruction nor" - " a constant"); + llvm_unreachable("The Use of a Function symbol is neither an instruction " + "nor a constant"); } return true; diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 61face2..9c23368 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -12,23 +12,23 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/TargetMachine.h" - #include "llvm/Analysis/JumpInstrTableInfo.h" #include "llvm/Analysis/Passes.h" #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/BasicTTIImpl.h" #include "llvm/CodeGen/ForwardControlFlowIntegrity.h" #include "llvm/CodeGen/JumpInstrTables.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/IRPrintingPasses.h" +#include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Verifier.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/PassManager.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" @@ -78,8 +78,10 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple, CodeGenInfo = T.createMCCodeGenInfo(Triple, RM, CM, OL); } -void LLVMTargetMachine::addAnalysisPasses(PassManagerBase &PM) { - PM.add(createBasicTargetTransformInfoPass(this)); +TargetIRAnalysis LLVMTargetMachine::getTargetIRAnalysis() { + return TargetIRAnalysis([this](Function &F) { + return TargetTransformInfo(BasicTTIImpl(this, F)); + }); } /// addPassesToX helper drives creation and initialization of TargetPassConfig. @@ -90,7 +92,7 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM, AnalysisID StopAfter) { // Add internal analysis passes from the target machine. - TM->addAnalysisPasses(PM); + PM.add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis())); // Targets may override createPassConfig to provide a target-specific // subclass. @@ -114,7 +116,7 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM, // all the per-module stuff we're generating, including MCContext. MachineModuleInfo *MMI = new MachineModuleInfo( *TM->getMCAsmInfo(), *TM->getSubtargetImpl()->getRegisterInfo(), - &TM->getSubtargetImpl()->getTargetLowering()->getObjFileLowering()); + TM->getObjFileLowering()); PM.add(MMI); // Set up a MachineFunction for the rest of CodeGen to work on. @@ -222,13 +224,11 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, } // Create the AsmPrinter, which takes ownership of AsmStreamer if successful. - FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer); + FunctionPass *Printer = + getTarget().createAsmPrinter(*this, std::move(AsmStreamer)); if (!Printer) return true; - // If successful, createAsmPrinter took ownership of AsmStreamer. - AsmStreamer.release(); - PM.add(Printer); return false; @@ -262,20 +262,16 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, if (!MCE || !MAB) return true; - std::unique_ptr<MCStreamer> AsmStreamer; - AsmStreamer.reset(getTarget() - .createMCObjectStreamer(getTargetTriple(), *Ctx, *MAB, - Out, MCE, STI, - Options.MCOptions.MCRelaxAll)); + std::unique_ptr<MCStreamer> AsmStreamer(getTarget().createMCObjectStreamer( + getTargetTriple(), *Ctx, *MAB, Out, MCE, STI, + Options.MCOptions.MCRelaxAll)); // Create the AsmPrinter, which takes ownership of AsmStreamer if successful. - FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer); + FunctionPass *Printer = + getTarget().createAsmPrinter(*this, std::move(AsmStreamer)); if (!Printer) return true; - // If successful, createAsmPrinter took ownership of AsmStreamer. - AsmStreamer.release(); - PM.add(Printer); return false; // success! diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp index b621e3b..9eaf7da 100644 --- a/lib/CodeGen/LexicalScopes.cpp +++ b/lib/CodeGen/LexicalScopes.cpp @@ -104,14 +104,6 @@ void LexicalScopes::extractLexicalScopes( } } -LexicalScope *LexicalScopes::findInlinedScope(DebugLoc DL) { - MDNode *Scope = nullptr; - MDNode *IA = nullptr; - DL.getScopeAndInlinedAt(Scope, IA, MF->getFunction()->getContext()); - auto I = InlinedLexicalScopeMap.find(std::make_pair(Scope, IA)); - return I != InlinedLexicalScopeMap.end() ? &I->second : nullptr; -} - /// findLexicalScope - Find lexical scope, either regular or inlined, for the /// given DebugLoc. Return NULL if not found. LexicalScope *LexicalScopes::findLexicalScope(DebugLoc DL) { @@ -168,11 +160,10 @@ LexicalScope *LexicalScopes::getOrCreateRegularScope(MDNode *Scope) { LexicalScope *Parent = nullptr; if (D.isLexicalBlock()) Parent = getOrCreateLexicalScope(DebugLoc::getFromDILexicalBlock(Scope)); - // FIXME: Use forward_as_tuple instead of make_tuple, once MSVC2012 - // compatibility is no longer required. - I = LexicalScopeMap.emplace(std::piecewise_construct, std::make_tuple(Scope), - std::make_tuple(Parent, DIDescriptor(Scope), - nullptr, false)).first; + I = LexicalScopeMap.emplace(std::piecewise_construct, + std::forward_as_tuple(Scope), + std::forward_as_tuple(Parent, DIDescriptor(Scope), + nullptr, false)).first; if (!Parent) { assert(DIDescriptor(Scope).isSubprogram()); @@ -199,12 +190,11 @@ LexicalScope *LexicalScopes::getOrCreateInlinedScope(MDNode *ScopeNode, else Parent = getOrCreateInlinedScope(Scope.getContext(), InlinedAt); - // FIXME: Use forward_as_tuple instead of make_tuple, once MSVC2012 - // compatibility is no longer required. I = InlinedLexicalScopeMap.emplace(std::piecewise_construct, - std::make_tuple(P), - std::make_tuple(Parent, Scope, InlinedAt, - false)).first; + std::forward_as_tuple(P), + std::forward_as_tuple(Parent, Scope, + InlinedAt, false)) + .first; return &I->second; } diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index 1624851..dc936a3 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -40,7 +40,6 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" - #include <memory> using namespace llvm; diff --git a/lib/CodeGen/LiveDebugVariables.h b/lib/CodeGen/LiveDebugVariables.h index 7e3b361..9748329 100644 --- a/lib/CodeGen/LiveDebugVariables.h +++ b/lib/CodeGen/LiveDebugVariables.h @@ -22,8 +22,8 @@ #define LLVM_LIB_CODEGEN_LIVEDEBUGVARIABLES_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/IR/DebugInfo.h" namespace llvm { diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index ddb0032..fd7516d 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -26,11 +26,278 @@ #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" #include <algorithm> using namespace llvm; +//===----------------------------------------------------------------------===// +// Implementation of various methods necessary for calculation of live ranges. +// The implementation of the methods abstracts from the concrete type of the +// segment collection. +// +// Implementation of the class follows the Template design pattern. The base +// class contains generic algorithms that call collection-specific methods, +// which are provided in concrete subclasses. In order to avoid virtual calls +// these methods are provided by means of C++ template instantiation. +// The base class calls the methods of the subclass through method impl(), +// which casts 'this' pointer to the type of the subclass. +// +//===----------------------------------------------------------------------===// + +template <typename ImplT, typename IteratorT, typename CollectionT> +class CalcLiveRangeUtilBase { +protected: + LiveRange *LR; + +protected: + CalcLiveRangeUtilBase(LiveRange *LR) : LR(LR) {} + +public: + typedef LiveRange::Segment Segment; + typedef IteratorT iterator; + + VNInfo *createDeadDef(SlotIndex Def, VNInfo::Allocator &VNInfoAllocator) { + assert(!Def.isDead() && "Cannot define a value at the dead slot"); + + iterator I = impl().find(Def); + if (I == segments().end()) { + VNInfo *VNI = LR->getNextValue(Def, VNInfoAllocator); + impl().insertAtEnd(Segment(Def, Def.getDeadSlot(), VNI)); + return VNI; + } + + Segment *S = segmentAt(I); + if (SlotIndex::isSameInstr(Def, S->start)) { + assert(S->valno->def == S->start && "Inconsistent existing value def"); + + // It is possible to have both normal and early-clobber defs of the same + // register on an instruction. It doesn't make a lot of sense, but it is + // possible to specify in inline assembly. + // + // Just convert everything to early-clobber. + Def = std::min(Def, S->start); + if (Def != S->start) + S->start = S->valno->def = Def; + return S->valno; + } + assert(SlotIndex::isEarlierInstr(Def, S->start) && "Already live at def"); + VNInfo *VNI = LR->getNextValue(Def, VNInfoAllocator); + segments().insert(I, Segment(Def, Def.getDeadSlot(), VNI)); + return VNI; + } + + VNInfo *extendInBlock(SlotIndex StartIdx, SlotIndex Use) { + if (segments().empty()) + return nullptr; + iterator I = + impl().findInsertPos(Segment(Use.getPrevSlot(), Use, nullptr)); + if (I == segments().begin()) + return nullptr; + --I; + if (I->end <= StartIdx) + return nullptr; + if (I->end < Use) + extendSegmentEndTo(I, Use); + return I->valno; + } + + /// This method is used when we want to extend the segment specified + /// by I to end at the specified endpoint. To do this, we should + /// merge and eliminate all segments that this will overlap + /// with. The iterator is not invalidated. + void extendSegmentEndTo(iterator I, SlotIndex NewEnd) { + assert(I != segments().end() && "Not a valid segment!"); + Segment *S = segmentAt(I); + VNInfo *ValNo = I->valno; + + // Search for the first segment that we can't merge with. + iterator MergeTo = std::next(I); + for (; MergeTo != segments().end() && NewEnd >= MergeTo->end; ++MergeTo) + assert(MergeTo->valno == ValNo && "Cannot merge with differing values!"); + + // If NewEnd was in the middle of a segment, make sure to get its endpoint. + S->end = std::max(NewEnd, std::prev(MergeTo)->end); + + // If the newly formed segment now touches the segment after it and if they + // have the same value number, merge the two segments into one segment. + if (MergeTo != segments().end() && MergeTo->start <= I->end && + MergeTo->valno == ValNo) { + S->end = MergeTo->end; + ++MergeTo; + } + + // Erase any dead segments. + segments().erase(std::next(I), MergeTo); + } + + /// This method is used when we want to extend the segment specified + /// by I to start at the specified endpoint. To do this, we should + /// merge and eliminate all segments that this will overlap with. + iterator extendSegmentStartTo(iterator I, SlotIndex NewStart) { + assert(I != segments().end() && "Not a valid segment!"); + Segment *S = segmentAt(I); + VNInfo *ValNo = I->valno; + + // Search for the first segment that we can't merge with. + iterator MergeTo = I; + do { + if (MergeTo == segments().begin()) { + S->start = NewStart; + segments().erase(MergeTo, I); + return I; + } + assert(MergeTo->valno == ValNo && "Cannot merge with differing values!"); + --MergeTo; + } while (NewStart <= MergeTo->start); + + // If we start in the middle of another segment, just delete a range and + // extend that segment. + if (MergeTo->end >= NewStart && MergeTo->valno == ValNo) { + segmentAt(MergeTo)->end = S->end; + } else { + // Otherwise, extend the segment right after. + ++MergeTo; + Segment *MergeToSeg = segmentAt(MergeTo); + MergeToSeg->start = NewStart; + MergeToSeg->end = S->end; + } + + segments().erase(std::next(MergeTo), std::next(I)); + return MergeTo; + } + + iterator addSegment(Segment S) { + SlotIndex Start = S.start, End = S.end; + iterator I = impl().findInsertPos(S); + + // If the inserted segment starts in the middle or right at the end of + // another segment, just extend that segment to contain the segment of S. + if (I != segments().begin()) { + iterator B = std::prev(I); + if (S.valno == B->valno) { + if (B->start <= Start && B->end >= Start) { + extendSegmentEndTo(B, End); + return B; + } + } else { + // Check to make sure that we are not overlapping two live segments with + // different valno's. + assert(B->end <= Start && + "Cannot overlap two segments with differing ValID's" + " (did you def the same reg twice in a MachineInstr?)"); + } + } + + // Otherwise, if this segment ends in the middle of, or right next + // to, another segment, merge it into that segment. + if (I != segments().end()) { + if (S.valno == I->valno) { + if (I->start <= End) { + I = extendSegmentStartTo(I, Start); + + // If S is a complete superset of a segment, we may need to grow its + // endpoint as well. + if (End > I->end) + extendSegmentEndTo(I, End); + return I; + } + } else { + // Check to make sure that we are not overlapping two live segments with + // different valno's. + assert(I->start >= End && + "Cannot overlap two segments with differing ValID's"); + } + } + + // Otherwise, this is just a new segment that doesn't interact with + // anything. + // Insert it. + return segments().insert(I, S); + } + +private: + ImplT &impl() { return *static_cast<ImplT *>(this); } + + CollectionT &segments() { return impl().segmentsColl(); } + + Segment *segmentAt(iterator I) { return const_cast<Segment *>(&(*I)); } +}; + +//===----------------------------------------------------------------------===// +// Instantiation of the methods for calculation of live ranges +// based on a segment vector. +//===----------------------------------------------------------------------===// + +class CalcLiveRangeUtilVector; +typedef CalcLiveRangeUtilBase<CalcLiveRangeUtilVector, LiveRange::iterator, + LiveRange::Segments> CalcLiveRangeUtilVectorBase; + +class CalcLiveRangeUtilVector : public CalcLiveRangeUtilVectorBase { +public: + CalcLiveRangeUtilVector(LiveRange *LR) : CalcLiveRangeUtilVectorBase(LR) {} + +private: + friend CalcLiveRangeUtilVectorBase; + + LiveRange::Segments &segmentsColl() { return LR->segments; } + + void insertAtEnd(const Segment &S) { LR->segments.push_back(S); } + + iterator find(SlotIndex Pos) { return LR->find(Pos); } + + iterator findInsertPos(Segment S) { + return std::upper_bound(LR->begin(), LR->end(), S.start); + } +}; + +//===----------------------------------------------------------------------===// +// Instantiation of the methods for calculation of live ranges +// based on a segment set. +//===----------------------------------------------------------------------===// + +class CalcLiveRangeUtilSet; +typedef CalcLiveRangeUtilBase<CalcLiveRangeUtilSet, + LiveRange::SegmentSet::iterator, + LiveRange::SegmentSet> CalcLiveRangeUtilSetBase; + +class CalcLiveRangeUtilSet : public CalcLiveRangeUtilSetBase { +public: + CalcLiveRangeUtilSet(LiveRange *LR) : CalcLiveRangeUtilSetBase(LR) {} + +private: + friend CalcLiveRangeUtilSetBase; + + LiveRange::SegmentSet &segmentsColl() { return *LR->segmentSet; } + + void insertAtEnd(const Segment &S) { + LR->segmentSet->insert(LR->segmentSet->end(), S); + } + + iterator find(SlotIndex Pos) { + iterator I = + LR->segmentSet->upper_bound(Segment(Pos, Pos.getNextSlot(), nullptr)); + if (I == LR->segmentSet->begin()) + return I; + iterator PrevI = std::prev(I); + if (Pos < (*PrevI).end) + return PrevI; + return I; + } + + iterator findInsertPos(Segment S) { + iterator I = LR->segmentSet->upper_bound(S); + if (I != LR->segmentSet->end() && !(S.start < *I)) + ++I; + return I; + } +}; + +//===----------------------------------------------------------------------===// +// LiveRange methods +//===----------------------------------------------------------------------===// + LiveRange::iterator LiveRange::find(SlotIndex Pos) { // This algorithm is basically std::upper_bound. // Unfortunately, std::upper_bound cannot be used with mixed types until we @@ -51,30 +318,11 @@ LiveRange::iterator LiveRange::find(SlotIndex Pos) { VNInfo *LiveRange::createDeadDef(SlotIndex Def, VNInfo::Allocator &VNInfoAllocator) { - assert(!Def.isDead() && "Cannot define a value at the dead slot"); - iterator I = find(Def); - if (I == end()) { - VNInfo *VNI = getNextValue(Def, VNInfoAllocator); - segments.push_back(Segment(Def, Def.getDeadSlot(), VNI)); - return VNI; - } - if (SlotIndex::isSameInstr(Def, I->start)) { - assert(I->valno->def == I->start && "Inconsistent existing value def"); - - // It is possible to have both normal and early-clobber defs of the same - // register on an instruction. It doesn't make a lot of sense, but it is - // possible to specify in inline assembly. - // - // Just convert everything to early-clobber. - Def = std::min(Def, I->start); - if (Def != I->start) - I->start = I->valno->def = Def; - return I->valno; - } - assert(SlotIndex::isEarlierInstr(Def, I->start) && "Already live at def"); - VNInfo *VNI = getNextValue(Def, VNInfoAllocator); - segments.insert(I, Segment(Def, Def.getDeadSlot(), VNI)); - return VNI; + // Use the segment set, if it is available. + if (segmentSet != nullptr) + return CalcLiveRangeUtilSet(this).createDeadDef(Def, VNInfoAllocator); + // Otherwise use the segment vector. + return CalcLiveRangeUtilVector(this).createDeadDef(Def, VNInfoAllocator); } // overlaps - Return true if the intersection of the two live ranges is @@ -185,6 +433,27 @@ bool LiveRange::overlaps(SlotIndex Start, SlotIndex End) const { return I != begin() && (--I)->end > Start; } +bool LiveRange::covers(const LiveRange &Other) const { + if (empty()) + return Other.empty(); + + const_iterator I = begin(); + for (const Segment &O : Other.segments) { + I = advanceTo(I, O.start); + if (I == end() || I->start > O.start) + return false; + + // Check adjacent live segments and see if we can get behind O.end. + while (I->end < O.end) { + const_iterator Last = I; + // Get next segment and abort if it was not adjacent. + ++I; + if (I == end() || Last->end != I->start) + return false; + } + } + return true; +} /// ValNo is dead, remove it. If it is the largest value number, just nuke it /// (and any other deleted values neighboring it), otherwise mark it as ~1U so @@ -204,8 +473,8 @@ void LiveRange::markValNoForDeletion(VNInfo *ValNo) { void LiveRange::RenumberValues() { SmallPtrSet<VNInfo*, 8> Seen; valnos.clear(); - for (const_iterator I = begin(), E = end(); I != E; ++I) { - VNInfo *VNI = I->valno; + for (const Segment &S : segments) { + VNInfo *VNI = S.valno; if (!Seen.insert(VNI).second) continue; assert(!VNI->isUnused() && "Unused valno used by live segment"); @@ -214,133 +483,35 @@ void LiveRange::RenumberValues() { } } -/// This method is used when we want to extend the segment specified by I to end -/// at the specified endpoint. To do this, we should merge and eliminate all -/// segments that this will overlap with. The iterator is not invalidated. -void LiveRange::extendSegmentEndTo(iterator I, SlotIndex NewEnd) { - assert(I != end() && "Not a valid segment!"); - VNInfo *ValNo = I->valno; - - // Search for the first segment that we can't merge with. - iterator MergeTo = std::next(I); - for (; MergeTo != end() && NewEnd >= MergeTo->end; ++MergeTo) { - assert(MergeTo->valno == ValNo && "Cannot merge with differing values!"); - } - - // If NewEnd was in the middle of a segment, make sure to get its endpoint. - I->end = std::max(NewEnd, std::prev(MergeTo)->end); - - // If the newly formed segment now touches the segment after it and if they - // have the same value number, merge the two segments into one segment. - if (MergeTo != end() && MergeTo->start <= I->end && - MergeTo->valno == ValNo) { - I->end = MergeTo->end; - ++MergeTo; - } - - // Erase any dead segments. - segments.erase(std::next(I), MergeTo); +void LiveRange::addSegmentToSet(Segment S) { + CalcLiveRangeUtilSet(this).addSegment(S); } - -/// This method is used when we want to extend the segment specified by I to -/// start at the specified endpoint. To do this, we should merge and eliminate -/// all segments that this will overlap with. -LiveRange::iterator -LiveRange::extendSegmentStartTo(iterator I, SlotIndex NewStart) { - assert(I != end() && "Not a valid segment!"); - VNInfo *ValNo = I->valno; - - // Search for the first segment that we can't merge with. - iterator MergeTo = I; - do { - if (MergeTo == begin()) { - I->start = NewStart; - segments.erase(MergeTo, I); - return I; - } - assert(MergeTo->valno == ValNo && "Cannot merge with differing values!"); - --MergeTo; - } while (NewStart <= MergeTo->start); - - // If we start in the middle of another segment, just delete a range and - // extend that segment. - if (MergeTo->end >= NewStart && MergeTo->valno == ValNo) { - MergeTo->end = I->end; - } else { - // Otherwise, extend the segment right after. - ++MergeTo; - MergeTo->start = NewStart; - MergeTo->end = I->end; +LiveRange::iterator LiveRange::addSegment(Segment S) { + // Use the segment set, if it is available. + if (segmentSet != nullptr) { + addSegmentToSet(S); + return end(); } - - segments.erase(std::next(MergeTo), std::next(I)); - return MergeTo; + // Otherwise use the segment vector. + return CalcLiveRangeUtilVector(this).addSegment(S); } -LiveRange::iterator LiveRange::addSegmentFrom(Segment S, iterator From) { - SlotIndex Start = S.start, End = S.end; - iterator it = std::upper_bound(From, end(), Start); - - // If the inserted segment starts in the middle or right at the end of - // another segment, just extend that segment to contain the segment of S. - if (it != begin()) { - iterator B = std::prev(it); - if (S.valno == B->valno) { - if (B->start <= Start && B->end >= Start) { - extendSegmentEndTo(B, End); - return B; - } - } else { - // Check to make sure that we are not overlapping two live segments with - // different valno's. - assert(B->end <= Start && - "Cannot overlap two segments with differing ValID's" - " (did you def the same reg twice in a MachineInstr?)"); - } - } - - // Otherwise, if this segment ends in the middle of, or right next to, another - // segment, merge it into that segment. - if (it != end()) { - if (S.valno == it->valno) { - if (it->start <= End) { - it = extendSegmentStartTo(it, Start); - - // If S is a complete superset of a segment, we may need to grow its - // endpoint as well. - if (End > it->end) - extendSegmentEndTo(it, End); - return it; - } - } else { - // Check to make sure that we are not overlapping two live segments with - // different valno's. - assert(it->start >= End && - "Cannot overlap two segments with differing ValID's"); - } - } - - // Otherwise, this is just a new segment that doesn't interact with anything. - // Insert it. - return segments.insert(it, S); +void LiveRange::append(const Segment S) { + // Check that the segment belongs to the back of the list. + assert(segments.empty() || segments.back().end <= S.start); + segments.push_back(S); } /// extendInBlock - If this range is live before Kill in the basic /// block that starts at StartIdx, extend it to be live up to Kill and return /// the value. If there is no live range before Kill, return NULL. VNInfo *LiveRange::extendInBlock(SlotIndex StartIdx, SlotIndex Kill) { - if (empty()) - return nullptr; - iterator I = std::upper_bound(begin(), end(), Kill.getPrevSlot()); - if (I == begin()) - return nullptr; - --I; - if (I->end <= StartIdx) - return nullptr; - if (I->end < Kill) - extendSegmentEndTo(I, Kill); - return I->valno; + // Use the segment set, if it is available. + if (segmentSet != nullptr) + return CalcLiveRangeUtilSet(this).extendInBlock(StartIdx, Kill); + // Otherwise use the segment vector. + return CalcLiveRangeUtilVector(this).extendInBlock(StartIdx, Kill); } /// Remove the specified segment from this range. Note that the segment must @@ -461,8 +632,8 @@ void LiveRange::join(LiveRange &Other, // This can leave Other in an invalid state because we're not coalescing // touching segments that now have identical values. That's OK since Other is // not supposed to be valid after calling join(); - for (iterator I = Other.begin(), E = Other.end(); I != E; ++I) - I->valno = NewVNInfo[RHSValNoAssignments[I->valno->id]]; + for (Segment &S : Other.segments) + S.valno = NewVNInfo[RHSValNoAssignments[S.valno->id]]; // Update val# info. Renumber them and make sure they all belong to this // LiveRange now. Also remove dead val#'s. @@ -482,8 +653,8 @@ void LiveRange::join(LiveRange &Other, // Okay, now insert the RHS live segments into the LHS. LiveRangeUpdater Updater(this); - for (iterator I = Other.begin(), E = Other.end(); I != E; ++I) - Updater.add(*I); + for (Segment &S : Other.segments) + Updater.add(S); } /// Merge all of the segments in RHS into this live range as the specified @@ -493,8 +664,8 @@ void LiveRange::join(LiveRange &Other, void LiveRange::MergeSegmentsInAsValue(const LiveRange &RHS, VNInfo *LHSValNo) { LiveRangeUpdater Updater(this); - for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) - Updater.add(I->start, I->end, LHSValNo); + for (const Segment &S : RHS.segments) + Updater.add(S.start, S.end, LHSValNo); } /// MergeValueInAsValue - Merge all of the live segments of a specific val# @@ -506,9 +677,9 @@ void LiveRange::MergeValueInAsValue(const LiveRange &RHS, const VNInfo *RHSValNo, VNInfo *LHSValNo) { LiveRangeUpdater Updater(this); - for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) - if (I->valno == RHSValNo) - Updater.add(I->start, I->end, LHSValNo); + for (const Segment &S : RHS.segments) + if (S.valno == RHSValNo) + Updater.add(S.start, S.end, LHSValNo); } /// MergeValueNumberInto - This method is called when two value nubmers @@ -570,10 +741,258 @@ VNInfo *LiveRange::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) { return V2; } +void LiveRange::flushSegmentSet() { + assert(segmentSet != nullptr && "segment set must have been created"); + assert( + segments.empty() && + "segment set can be used only initially before switching to the array"); + segments.append(segmentSet->begin(), segmentSet->end()); + delete segmentSet; + segmentSet = nullptr; + verify(); +} + +void LiveInterval::freeSubRange(SubRange *S) { + S->~SubRange(); + // Memory was allocated with BumpPtr allocator and is not freed here. +} + +void LiveInterval::removeEmptySubRanges() { + SubRange **NextPtr = &SubRanges; + SubRange *I = *NextPtr; + while (I != nullptr) { + if (!I->empty()) { + NextPtr = &I->Next; + I = *NextPtr; + continue; + } + // Skip empty subranges until we find the first nonempty one. + do { + SubRange *Next = I->Next; + freeSubRange(I); + I = Next; + } while (I != nullptr && I->empty()); + *NextPtr = I; + } +} + +void LiveInterval::clearSubRanges() { + for (SubRange *I = SubRanges, *Next; I != nullptr; I = Next) { + Next = I->Next; + freeSubRange(I); + } + SubRanges = nullptr; +} + +/// Helper function for constructMainRangeFromSubranges(): Search the CFG +/// backwards until we find a place covered by a LiveRange segment that actually +/// has a valno set. +static VNInfo *searchForVNI(const SlotIndexes &Indexes, LiveRange &LR, + const MachineBasicBlock *MBB, + SmallPtrSetImpl<const MachineBasicBlock*> &Visited) { + // We start the search at the end of MBB. + SlotIndex EndIdx = Indexes.getMBBEndIdx(MBB); + // In our use case we can't live the area covered by the live segments without + // finding an actual VNI def. + LiveRange::iterator I = LR.find(EndIdx.getPrevSlot()); + assert(I != LR.end()); + LiveRange::Segment &S = *I; + if (S.valno != nullptr) + return S.valno; + + VNInfo *VNI = nullptr; + // Continue at predecessors (we could even go to idom with domtree available). + for (const MachineBasicBlock *Pred : MBB->predecessors()) { + // Avoid going in circles. + if (!Visited.insert(Pred).second) + continue; + + VNI = searchForVNI(Indexes, LR, Pred, Visited); + if (VNI != nullptr) { + S.valno = VNI; + break; + } + } + + return VNI; +} + +static void determineMissingVNIs(const SlotIndexes &Indexes, LiveInterval &LI) { + SmallPtrSet<const MachineBasicBlock*, 5> Visited; + for (LiveRange::Segment &S : LI.segments) { + if (S.valno != nullptr) + continue; + // This can only happen at the begin of a basic block. + assert(S.start.isBlock() && "valno should only be missing at block begin"); + + Visited.clear(); + const MachineBasicBlock *MBB = Indexes.getMBBFromIndex(S.start); + for (const MachineBasicBlock *Pred : MBB->predecessors()) { + VNInfo *VNI = searchForVNI(Indexes, LI, Pred, Visited); + if (VNI != nullptr) { + S.valno = VNI; + break; + } + } + assert(S.valno != nullptr && "could not determine valno"); + } +} + +void LiveInterval::constructMainRangeFromSubranges( + const SlotIndexes &Indexes, VNInfo::Allocator &VNIAllocator) { + // The basic observations on which this algorithm is based: + // - Each Def/ValNo in a subrange must have a corresponding def on the main + // range, but not further defs/valnos are necessary. + // - If any of the subranges is live at a point the main liverange has to be + // live too, conversily if no subrange is live the main range mustn't be + // live either. + // We do this by scannig through all the subranges simultaneously creating new + // segments in the main range as segments start/ends come up in the subranges. + assert(hasSubRanges() && "expected subranges to be present"); + assert(segments.empty() && valnos.empty() && "expected empty main range"); + + // Collect subrange, iterator pairs for the walk and determine first and last + // SlotIndex involved. + SmallVector<std::pair<const SubRange*, const_iterator>, 4> SRs; + SlotIndex First; + SlotIndex Last; + for (const SubRange &SR : subranges()) { + if (SR.empty()) + continue; + SRs.push_back(std::make_pair(&SR, SR.begin())); + if (!First.isValid() || SR.segments.front().start < First) + First = SR.segments.front().start; + if (!Last.isValid() || SR.segments.back().end > Last) + Last = SR.segments.back().end; + } + + // Walk over all subranges simultaneously. + Segment CurrentSegment; + bool ConstructingSegment = false; + bool NeedVNIFixup = false; + unsigned ActiveMask = 0; + SlotIndex Pos = First; + while (true) { + SlotIndex NextPos = Last; + enum { + NOTHING, + BEGIN_SEGMENT, + END_SEGMENT, + } Event = NOTHING; + // Which subregister lanes are affected by the current event. + unsigned EventMask = 0; + // Whether a BEGIN_SEGMENT is also a valno definition point. + bool IsDef = false; + // Find the next begin or end of a subrange segment. Combine masks if we + // have multiple begins/ends at the same position. Ends take precedence over + // Begins. + for (auto &SRP : SRs) { + const SubRange &SR = *SRP.first; + const_iterator &I = SRP.second; + // Advance iterator of subrange to a segment involving Pos; the earlier + // segments are already merged at this point. + while (I != SR.end() && + (I->end < Pos || + (I->end == Pos && (ActiveMask & SR.LaneMask) == 0))) + ++I; + if (I == SR.end()) + continue; + if ((ActiveMask & SR.LaneMask) == 0 && + Pos <= I->start && I->start <= NextPos) { + // Merge multiple begins at the same position. + if (I->start == NextPos && Event == BEGIN_SEGMENT) { + EventMask |= SR.LaneMask; + IsDef |= I->valno->def == I->start; + } else if (I->start < NextPos || Event != END_SEGMENT) { + Event = BEGIN_SEGMENT; + NextPos = I->start; + EventMask = SR.LaneMask; + IsDef = I->valno->def == I->start; + } + } + if ((ActiveMask & SR.LaneMask) != 0 && + Pos <= I->end && I->end <= NextPos) { + // Merge multiple ends at the same position. + if (I->end == NextPos && Event == END_SEGMENT) + EventMask |= SR.LaneMask; + else { + Event = END_SEGMENT; + NextPos = I->end; + EventMask = SR.LaneMask; + } + } + } + + // Advance scan position. + Pos = NextPos; + if (Event == BEGIN_SEGMENT) { + if (ConstructingSegment && IsDef) { + // Finish previous segment because we have to start a new one. + CurrentSegment.end = Pos; + append(CurrentSegment); + ConstructingSegment = false; + } + + // Start a new segment if necessary. + if (!ConstructingSegment) { + // Determine value number for the segment. + VNInfo *VNI; + if (IsDef) { + VNI = getNextValue(Pos, VNIAllocator); + } else { + // We have to reuse an existing value number, if we are lucky + // then we already passed one of the predecessor blocks and determined + // its value number (with blocks in reverse postorder this would be + // always true but we have no such guarantee). + assert(Pos.isBlock()); + const MachineBasicBlock *MBB = Indexes.getMBBFromIndex(Pos); + // See if any of the predecessor blocks has a lower number and a VNI + for (const MachineBasicBlock *Pred : MBB->predecessors()) { + SlotIndex PredEnd = Indexes.getMBBEndIdx(Pred); + VNI = getVNInfoBefore(PredEnd); + if (VNI != nullptr) + break; + } + // Def will come later: We have to do an extra fixup pass. + if (VNI == nullptr) + NeedVNIFixup = true; + } + + CurrentSegment.start = Pos; + CurrentSegment.valno = VNI; + ConstructingSegment = true; + } + ActiveMask |= EventMask; + } else if (Event == END_SEGMENT) { + assert(ConstructingSegment); + // Finish segment if no lane is active anymore. + ActiveMask &= ~EventMask; + if (ActiveMask == 0) { + CurrentSegment.end = Pos; + append(CurrentSegment); + ConstructingSegment = false; + } + } else { + // We reached the end of the last subranges and can stop. + assert(Event == NOTHING); + break; + } + } + + // We might not be able to assign new valnos for all segments if the basic + // block containing the definition comes after a segment using the valno. + // Do a fixup pass for this uncommon case. + if (NeedVNIFixup) + determineMissingVNIs(Indexes, *this); + + assert(ActiveMask == 0 && !ConstructingSegment && "all segments ended"); + verify(); +} + unsigned LiveInterval::getSize() const { unsigned Sum = 0; - for (const_iterator I = begin(), E = end(); I != E; ++I) - Sum += I->start.distance(I->end); + for (const Segment &S : segments) + Sum += S.start.distance(S.end); return Sum; } @@ -591,9 +1010,9 @@ void LiveRange::print(raw_ostream &OS) const { if (empty()) OS << "EMPTY"; else { - for (const_iterator I = begin(), E = end(); I != E; ++I) { - OS << *I; - assert(I->valno == getValNumInfo(I->valno->id) && "Bad VNInfo"); + for (const Segment &S : segments) { + OS << S; + assert(S.valno == getValNumInfo(S.valno->id) && "Bad VNInfo"); } } @@ -620,6 +1039,10 @@ void LiveRange::print(raw_ostream &OS) const { void LiveInterval::print(raw_ostream &OS) const { OS << PrintReg(reg) << ' '; super::print(OS); + // Print subranges + for (const SubRange &SR : subranges()) { + OS << format(" L%04X ", SR.LaneMask) << SR; + } } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -648,6 +1071,26 @@ void LiveRange::verify() const { } } } + +void LiveInterval::verify(const MachineRegisterInfo *MRI) const { + super::verify(); + + // Make sure SubRanges are fine and LaneMasks are disjunct. + unsigned Mask = 0; + unsigned MaxMask = MRI != nullptr ? MRI->getMaxLaneMaskForVReg(reg) : ~0u; + for (const SubRange &SR : subranges()) { + // Subrange lanemask should be disjunct to any previous subrange masks. + assert((Mask & SR.LaneMask) == 0); + Mask |= SR.LaneMask; + + // subrange mask should not contained in maximum lane mask for the vreg. + assert((Mask & ~MaxMask) == 0); + + SR.verify(); + // Main liverange should cover subrange. + assert(covers(SR)); + } +} #endif @@ -692,14 +1135,14 @@ void LiveRangeUpdater::print(raw_ostream &OS) const { OS << " updater with gap = " << (ReadI - WriteI) << ", last start = " << LastStart << ":\n Area 1:"; - for (LiveRange::const_iterator I = LR->begin(); I != WriteI; ++I) - OS << ' ' << *I; + for (const auto &S : make_range(LR->begin(), WriteI)) + OS << ' ' << S; OS << "\n Spills:"; for (unsigned I = 0, E = Spills.size(); I != E; ++I) OS << ' ' << Spills[I]; OS << "\n Area 2:"; - for (LiveRange::const_iterator I = ReadI, E = LR->end(); I != E; ++I) - OS << ' ' << *I; + for (const auto &S : make_range(ReadI, LR->end())) + OS << ' ' << S; OS << '\n'; } @@ -723,6 +1166,13 @@ static inline bool coalescable(const LiveRange::Segment &A, void LiveRangeUpdater::add(LiveRange::Segment Seg) { assert(LR && "Cannot add to a null destination"); + // Fall back to the regular add method if the live range + // is using the segment set instead of the segment vector. + if (LR->segmentSet != nullptr) { + LR->addSegmentToSet(Seg); + return; + } + // Flush the state if Start moves backwards. if (!LastStart.isValid() || LastStart > Seg.start) { if (isDirty()) @@ -860,9 +1310,7 @@ unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) { const VNInfo *used = nullptr, *unused = nullptr; // Determine connections. - for (LiveInterval::const_vni_iterator I = LI->vni_begin(), E = LI->vni_end(); - I != E; ++I) { - const VNInfo *VNI = *I; + for (const VNInfo *VNI : LI->valnos) { // Group all unused values into one class. if (VNI->isUnused()) { if (unused) @@ -938,6 +1386,8 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[], } else *J++ = *I; } + // TODO: do not cheat anymore by simply cleaning all subranges + LI.clearSubRanges(); LI.segments.erase(J, E); // Transfer VNInfos to their new owners and renumber them. diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 1742e63..cc08045 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -32,6 +32,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -62,6 +63,17 @@ static cl::opt<bool> EnablePrecomputePhysRegs( static bool EnablePrecomputePhysRegs = false; #endif // NDEBUG +static cl::opt<bool> EnableSubRegLiveness( + "enable-subreg-liveness", cl::Hidden, cl::init(true), + cl::desc("Enable subregister liveness tracking.")); + +namespace llvm { +cl::opt<bool> UseSegmentSetForPhysRegs( + "use-segment-set-for-physregs", cl::Hidden, cl::init(true), + cl::desc( + "Use segment set for the computation of the live ranges of physregs.")); +} + void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired<AliasAnalysis>(); @@ -115,6 +127,10 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { AA = &getAnalysis<AliasAnalysis>(); Indexes = &getAnalysis<SlotIndexes>(); DomTree = &getAnalysis<MachineDominatorTree>(); + + if (EnableSubRegLiveness && MF->getSubtarget().enableSubRegLiveness()) + MRI->enableSubRegLiveness(true); + if (!LRCalc) LRCalc = new LiveRangeCalc(); @@ -183,9 +199,8 @@ void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) { assert(LRCalc && "LRCalc not initialized."); assert(LI.empty() && "Should only compute empty intervals."); LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); - LRCalc->createDeadDefs(LI); - LRCalc->extendToUses(LI); - computeDeadValues(&LI, LI, nullptr, nullptr); + LRCalc->calculate(LI); + computeDeadValues(LI, nullptr); } void LiveIntervals::computeVirtRegs() { @@ -260,6 +275,10 @@ void LiveIntervals::computeRegUnitRange(LiveRange &LR, unsigned Unit) { LRCalc->extendToUses(LR, Reg); } } + + // Flush the segment set to the segment vector. + if (UseSegmentSetForPhysRegs) + LR.flushSegmentSet(); } @@ -292,7 +311,8 @@ void LiveIntervals::computeLiveInRegUnits() { unsigned Unit = *Units; LiveRange *LR = RegUnitRanges[Unit]; if (!LR) { - LR = RegUnitRanges[Unit] = new LiveRange(); + // Use segment set to speed-up initial computation of the live range. + LR = RegUnitRanges[Unit] = new LiveRange(UseSegmentSetForPhysRegs); NewRanges.push_back(Unit); } VNInfo *VNI = LR->createDeadDef(Begin, getVNInfoAllocator()); @@ -312,6 +332,70 @@ void LiveIntervals::computeLiveInRegUnits() { } +static void createSegmentsForValues(LiveRange &LR, + iterator_range<LiveInterval::vni_iterator> VNIs) { + for (auto VNI : VNIs) { + if (VNI->isUnused()) + continue; + SlotIndex Def = VNI->def; + LR.addSegment(LiveRange::Segment(Def, Def.getDeadSlot(), VNI)); + } +} + +typedef SmallVector<std::pair<SlotIndex, VNInfo*>, 16> ShrinkToUsesWorkList; + +static void extendSegmentsToUses(LiveRange &LR, const SlotIndexes &Indexes, + ShrinkToUsesWorkList &WorkList, + const LiveRange &OldRange) { + // Keep track of the PHIs that are in use. + SmallPtrSet<VNInfo*, 8> UsedPHIs; + // Blocks that have already been added to WorkList as live-out. + SmallPtrSet<MachineBasicBlock*, 16> LiveOut; + + // Extend intervals to reach all uses in WorkList. + while (!WorkList.empty()) { + SlotIndex Idx = WorkList.back().first; + VNInfo *VNI = WorkList.back().second; + WorkList.pop_back(); + const MachineBasicBlock *MBB = Indexes.getMBBFromIndex(Idx.getPrevSlot()); + SlotIndex BlockStart = Indexes.getMBBStartIdx(MBB); + + // Extend the live range for VNI to be live at Idx. + if (VNInfo *ExtVNI = LR.extendInBlock(BlockStart, Idx)) { + assert(ExtVNI == VNI && "Unexpected existing value number"); + (void)ExtVNI; + // Is this a PHIDef we haven't seen before? + if (!VNI->isPHIDef() || VNI->def != BlockStart || + !UsedPHIs.insert(VNI).second) + continue; + // The PHI is live, make sure the predecessors are live-out. + for (auto &Pred : MBB->predecessors()) { + if (!LiveOut.insert(Pred).second) + continue; + SlotIndex Stop = Indexes.getMBBEndIdx(Pred); + // A predecessor is not required to have a live-out value for a PHI. + if (VNInfo *PVNI = OldRange.getVNInfoBefore(Stop)) + WorkList.push_back(std::make_pair(Stop, PVNI)); + } + continue; + } + + // VNI is live-in to MBB. + DEBUG(dbgs() << " live-in at " << BlockStart << '\n'); + LR.addSegment(LiveRange::Segment(BlockStart, Idx, VNI)); + + // Make sure VNI is live-out from the predecessors. + for (auto &Pred : MBB->predecessors()) { + if (!LiveOut.insert(Pred).second) + continue; + SlotIndex Stop = Indexes.getMBBEndIdx(Pred); + assert(OldRange.getVNInfoBefore(Stop) == VNI && + "Wrong value out of predecessor"); + WorkList.push_back(std::make_pair(Stop, VNI)); + } + } +} + /// shrinkToUses - After removing some uses of a register, shrink its live /// range to just the remaining uses. This method does not compute reaching /// defs for new uses, and it doesn't remove dead defs. @@ -320,11 +404,14 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, DEBUG(dbgs() << "Shrink: " << *li << '\n'); assert(TargetRegisterInfo::isVirtualRegister(li->reg) && "Can only shrink virtual registers"); - // Find all the values used, including PHI kills. - SmallVector<std::pair<SlotIndex, VNInfo*>, 16> WorkList; - // Blocks that have already been added to WorkList as live-out. - SmallPtrSet<MachineBasicBlock*, 16> LiveOut; + // Shrink subregister live ranges. + for (LiveInterval::SubRange &S : li->subranges()) { + shrinkToUses(S, li->reg); + } + + // Find all the values used, including PHI kills. + ShrinkToUsesWorkList WorkList; // Visit all instructions reading li->reg. for (MachineRegisterInfo::reg_instr_iterator @@ -355,103 +442,126 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, // Create new live ranges with only minimal live segments per def. LiveRange NewLR; - for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end(); - I != E; ++I) { - VNInfo *VNI = *I; - if (VNI->isUnused()) - continue; - NewLR.addSegment(LiveRange::Segment(VNI->def, VNI->def.getDeadSlot(), VNI)); - } + createSegmentsForValues(NewLR, make_range(li->vni_begin(), li->vni_end())); + extendSegmentsToUses(NewLR, *Indexes, WorkList, *li); - // Keep track of the PHIs that are in use. - SmallPtrSet<VNInfo*, 8> UsedPHIs; + // Move the trimmed segments back. + li->segments.swap(NewLR.segments); - // Extend intervals to reach all uses in WorkList. - while (!WorkList.empty()) { - SlotIndex Idx = WorkList.back().first; - VNInfo *VNI = WorkList.back().second; - WorkList.pop_back(); - const MachineBasicBlock *MBB = getMBBFromIndex(Idx.getPrevSlot()); - SlotIndex BlockStart = getMBBStartIdx(MBB); + // Handle dead values. + bool CanSeparate = computeDeadValues(*li, dead); + DEBUG(dbgs() << "Shrunk: " << *li << '\n'); + return CanSeparate; +} - // Extend the live range for VNI to be live at Idx. - if (VNInfo *ExtVNI = NewLR.extendInBlock(BlockStart, Idx)) { - (void)ExtVNI; - assert(ExtVNI == VNI && "Unexpected existing value number"); - // Is this a PHIDef we haven't seen before? - if (!VNI->isPHIDef() || VNI->def != BlockStart || - !UsedPHIs.insert(VNI).second) - continue; - // The PHI is live, make sure the predecessors are live-out. - for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { - if (!LiveOut.insert(*PI).second) - continue; - SlotIndex Stop = getMBBEndIdx(*PI); - // A predecessor is not required to have a live-out value for a PHI. - if (VNInfo *PVNI = li->getVNInfoBefore(Stop)) - WorkList.push_back(std::make_pair(Stop, PVNI)); +bool LiveIntervals::computeDeadValues(LiveInterval &LI, + SmallVectorImpl<MachineInstr*> *dead) { + bool PHIRemoved = false; + for (auto VNI : LI.valnos) { + if (VNI->isUnused()) + continue; + SlotIndex Def = VNI->def; + LiveRange::iterator I = LI.FindSegmentContaining(Def); + assert(I != LI.end() && "Missing segment for VNI"); + + // Is the register live before? Otherwise we may have to add a read-undef + // flag for subregister defs. + if (MRI->tracksSubRegLiveness()) { + if ((I == LI.begin() || std::prev(I)->end < Def) && !VNI->isPHIDef()) { + MachineInstr *MI = getInstructionFromIndex(Def); + MI->addRegisterDefReadUndef(LI.reg); } + } + + if (I->end != Def.getDeadSlot()) continue; + if (VNI->isPHIDef()) { + // This is a dead PHI. Remove it. + VNI->markUnused(); + LI.removeSegment(I); + DEBUG(dbgs() << "Dead PHI at " << Def << " may separate interval\n"); + PHIRemoved = true; + } else { + // This is a dead def. Make sure the instruction knows. + MachineInstr *MI = getInstructionFromIndex(Def); + assert(MI && "No instruction defining live value"); + MI->addRegisterDead(LI.reg, TRI); + if (dead && MI->allDefsAreDead()) { + DEBUG(dbgs() << "All defs dead: " << Def << '\t' << *MI); + dead->push_back(MI); + } } + } + return PHIRemoved; +} - // VNI is live-in to MBB. - DEBUG(dbgs() << " live-in at " << BlockStart << '\n'); - NewLR.addSegment(LiveRange::Segment(BlockStart, Idx, VNI)); +void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg) +{ + DEBUG(dbgs() << "Shrink: " << SR << '\n'); + assert(TargetRegisterInfo::isVirtualRegister(Reg) + && "Can only shrink virtual registers"); + // Find all the values used, including PHI kills. + ShrinkToUsesWorkList WorkList; - // Make sure VNI is live-out from the predecessors. - for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { - if (!LiveOut.insert(*PI).second) + // Visit all instructions reading Reg. + SlotIndex LastIdx; + for (MachineOperand &MO : MRI->reg_operands(Reg)) { + MachineInstr *UseMI = MO.getParent(); + if (UseMI->isDebugValue()) + continue; + // Maybe the operand is for a subregister we don't care about. + unsigned SubReg = MO.getSubReg(); + if (SubReg != 0) { + unsigned SubRegMask = TRI->getSubRegIndexLaneMask(SubReg); + if ((SubRegMask & SR.LaneMask) == 0) continue; - SlotIndex Stop = getMBBEndIdx(*PI); - assert(li->getVNInfoBefore(Stop) == VNI && - "Wrong value out of predecessor"); - WorkList.push_back(std::make_pair(Stop, VNI)); } + // We only need to visit each instruction once. + SlotIndex Idx = getInstructionIndex(UseMI).getRegSlot(); + if (Idx == LastIdx) + continue; + LastIdx = Idx; + + LiveQueryResult LRQ = SR.Query(Idx); + VNInfo *VNI = LRQ.valueIn(); + // For Subranges it is possible that only undef values are left in that + // part of the subregister, so there is no real liverange at the use + if (!VNI) + continue; + + // Special case: An early-clobber tied operand reads and writes the + // register one slot early. + if (VNInfo *DefVNI = LRQ.valueDefined()) + Idx = DefVNI->def; + + WorkList.push_back(std::make_pair(Idx, VNI)); } - // Handle dead values. - bool CanSeparate = false; - computeDeadValues(li, NewLR, &CanSeparate, dead); + // Create a new live ranges with only minimal live segments per def. + LiveRange NewLR; + createSegmentsForValues(NewLR, make_range(SR.vni_begin(), SR.vni_end())); + extendSegmentsToUses(NewLR, *Indexes, WorkList, SR); - // Move the trimmed segments back. - li->segments.swap(NewLR.segments); - DEBUG(dbgs() << "Shrunk: " << *li << '\n'); - return CanSeparate; -} + // Move the trimmed ranges back. + SR.segments.swap(NewLR.segments); -void LiveIntervals::computeDeadValues(LiveInterval *li, - LiveRange &LR, - bool *CanSeparate, - SmallVectorImpl<MachineInstr*> *dead) { - for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end(); - I != E; ++I) { - VNInfo *VNI = *I; + // Remove dead PHI value numbers + for (auto VNI : SR.valnos) { if (VNI->isUnused()) continue; - LiveRange::iterator LRI = LR.FindSegmentContaining(VNI->def); - assert(LRI != LR.end() && "Missing segment for PHI"); - if (LRI->end != VNI->def.getDeadSlot()) + const LiveRange::Segment *Segment = SR.getSegmentContaining(VNI->def); + assert(Segment != nullptr && "Missing segment for VNI"); + if (Segment->end != VNI->def.getDeadSlot()) continue; if (VNI->isPHIDef()) { // This is a dead PHI. Remove it. VNI->markUnused(); - LR.removeSegment(LRI->start, LRI->end); + SR.removeSegment(*Segment); DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n"); - if (CanSeparate) - *CanSeparate = true; - } else { - // This is a dead def. Make sure the instruction knows. - MachineInstr *MI = getInstructionFromIndex(VNI->def); - assert(MI && "No instruction defining live value"); - MI->addRegisterDead(li->reg, TRI); - if (dead && MI->allDefsAreDead()) { - DEBUG(dbgs() << "All defs dead: " << VNI->def << '\t' << *MI); - dead->push_back(MI); - } } } + + DEBUG(dbgs() << "Shrunk: " << SR << '\n'); } void LiveIntervals::extendToIndices(LiveRange &LR, @@ -462,26 +572,25 @@ void LiveIntervals::extendToIndices(LiveRange &LR, LRCalc->extend(LR, Indices[i]); } -void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill, +void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill, SmallVectorImpl<SlotIndex> *EndPoints) { - LiveQueryResult LRQ = LI->Query(Kill); - VNInfo *VNI = LRQ.valueOut(); + LiveQueryResult LRQ = LR.Query(Kill); + VNInfo *VNI = LRQ.valueOutOrDead(); if (!VNI) return; MachineBasicBlock *KillMBB = Indexes->getMBBFromIndex(Kill); - SlotIndex MBBStart, MBBEnd; - std::tie(MBBStart, MBBEnd) = Indexes->getMBBRange(KillMBB); + SlotIndex MBBEnd = Indexes->getMBBEndIdx(KillMBB); // If VNI isn't live out from KillMBB, the value is trivially pruned. if (LRQ.endPoint() < MBBEnd) { - LI->removeSegment(Kill, LRQ.endPoint()); + LR.removeSegment(Kill, LRQ.endPoint()); if (EndPoints) EndPoints->push_back(LRQ.endPoint()); return; } // VNI is live out of KillMBB. - LI->removeSegment(Kill, MBBEnd); + LR.removeSegment(Kill, MBBEnd); if (EndPoints) EndPoints->push_back(MBBEnd); // Find all blocks that are reachable from KillMBB without leaving VNI's live @@ -498,8 +607,9 @@ void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill, MachineBasicBlock *MBB = *I; // Check if VNI is live in to MBB. + SlotIndex MBBStart, MBBEnd; std::tie(MBBStart, MBBEnd) = Indexes->getMBBRange(MBB); - LiveQueryResult LRQ = LI->Query(MBBStart); + LiveQueryResult LRQ = LR.Query(MBBStart); if (LRQ.valueIn() != VNI) { // This block isn't part of the VNI segment. Prune the search. I.skipChildren(); @@ -508,14 +618,14 @@ void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill, // Prune the search if VNI is killed in MBB. if (LRQ.endPoint() < MBBEnd) { - LI->removeSegment(MBBStart, LRQ.endPoint()); + LR.removeSegment(MBBStart, LRQ.endPoint()); if (EndPoints) EndPoints->push_back(LRQ.endPoint()); I.skipChildren(); continue; } // VNI is live through MBB. - LI->removeSegment(MBBStart, MBBEnd); + LR.removeSegment(MBBStart, MBBEnd); if (EndPoints) EndPoints->push_back(MBBEnd); ++I; } @@ -528,14 +638,17 @@ void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill, void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { // Keep track of regunit ranges. - SmallVector<std::pair<LiveRange*, LiveRange::iterator>, 8> RU; + SmallVector<std::pair<const LiveRange*, LiveRange::const_iterator>, 8> RU; + // Keep track of subregister ranges. + SmallVector<std::pair<const LiveInterval::SubRange*, + LiveRange::const_iterator>, 4> SRs; for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); if (MRI->reg_nodbg_empty(Reg)) continue; - LiveInterval *LI = &getInterval(Reg); - if (LI->empty()) + const LiveInterval &LI = getInterval(Reg); + if (LI.empty()) continue; // Find the regunit intervals for the assigned register. They may overlap @@ -543,15 +656,22 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { RU.clear(); for (MCRegUnitIterator Units(VRM->getPhys(Reg), TRI); Units.isValid(); ++Units) { - LiveRange &RURanges = getRegUnit(*Units); - if (RURanges.empty()) + const LiveRange &RURange = getRegUnit(*Units); + if (RURange.empty()) continue; - RU.push_back(std::make_pair(&RURanges, RURanges.find(LI->begin()->end))); + RU.push_back(std::make_pair(&RURange, RURange.find(LI.begin()->end))); + } + + if (MRI->tracksSubRegLiveness()) { + SRs.clear(); + for (const LiveInterval::SubRange &SR : LI.subranges()) { + SRs.push_back(std::make_pair(&SR, SR.find(LI.begin()->end))); + } } // Every instruction that kills Reg corresponds to a segment range end // point. - for (LiveInterval::iterator RI = LI->begin(), RE = LI->end(); RI != RE; + for (LiveInterval::const_iterator RI = LI.begin(), RE = LI.end(); RI != RE; ++RI) { // A block index indicates an MBB edge. if (RI->end.isBlock()) @@ -568,23 +688,80 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { // BAR %EAX<kill> // // There should be no kill flag on FOO when %vreg5 is rewritten as %EAX. - bool CancelKill = false; - for (unsigned u = 0, e = RU.size(); u != e; ++u) { - LiveRange &RRanges = *RU[u].first; - LiveRange::iterator &I = RU[u].second; - if (I == RRanges.end()) + for (auto &RUP : RU) { + const LiveRange &RURange = *RUP.first; + LiveRange::const_iterator &I = RUP.second; + if (I == RURange.end()) continue; - I = RRanges.advanceTo(I, RI->end); - if (I == RRanges.end() || I->start >= RI->end) + I = RURange.advanceTo(I, RI->end); + if (I == RURange.end() || I->start >= RI->end) continue; // I is overlapping RI. - CancelKill = true; - break; + goto CancelKill; + } + + if (MRI->tracksSubRegLiveness()) { + // When reading a partial undefined value we must not add a kill flag. + // The regalloc might have used the undef lane for something else. + // Example: + // %vreg1 = ... ; R32: %vreg1 + // %vreg2:high16 = ... ; R64: %vreg2 + // = read %vreg2<kill> ; R64: %vreg2 + // = read %vreg1 ; R32: %vreg1 + // The <kill> flag is correct for %vreg2, but the register allocator may + // assign R0L to %vreg1, and R0 to %vreg2 because the low 32bits of R0 + // are actually never written by %vreg2. After assignment the <kill> + // flag at the read instruction is invalid. + unsigned DefinedLanesMask; + if (!SRs.empty()) { + // Compute a mask of lanes that are defined. + DefinedLanesMask = 0; + for (auto &SRP : SRs) { + const LiveInterval::SubRange &SR = *SRP.first; + LiveRange::const_iterator &I = SRP.second; + if (I == SR.end()) + continue; + I = SR.advanceTo(I, RI->end); + if (I == SR.end() || I->start >= RI->end) + continue; + // I is overlapping RI + DefinedLanesMask |= SR.LaneMask; + } + } else + DefinedLanesMask = ~0u; + + bool IsFullWrite = false; + for (const MachineOperand &MO : MI->operands()) { + if (!MO.isReg() || MO.getReg() != Reg) + continue; + if (MO.isUse()) { + // Reading any undefined lanes? + unsigned UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg()); + if ((UseMask & ~DefinedLanesMask) != 0) + goto CancelKill; + } else if (MO.getSubReg() == 0) { + // Writing to the full register? + assert(MO.isDef()); + IsFullWrite = true; + } + } + + // If an instruction writes to a subregister, a new segment starts in + // the LiveInterval. But as this is only overriding part of the register + // adding kill-flags is not correct here after registers have been + // assigned. + if (!IsFullWrite) { + // Next segment has to be adjacent in the subregister write case. + LiveRange::const_iterator N = std::next(RI); + if (N != LI.end() && N->start == RI->end) + goto CancelKill; + } } - if (CancelKill) - MI->clearRegisterKills(Reg, nullptr); - else - MI->addRegisterKilled(Reg, nullptr); + + MI->addRegisterKilled(Reg, nullptr); + continue; +CancelKill: + MI->clearRegisterKills(Reg, nullptr); } } } @@ -615,9 +792,7 @@ LiveIntervals::intervalIsInOneMBB(const LiveInterval &LI) const { bool LiveIntervals::hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const { - for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end(); - I != E; ++I) { - const VNInfo *PHI = *I; + for (const VNInfo *PHI : LI.valnos) { if (PHI->isUnused() || !PHI->isPHIDef()) continue; const MachineBasicBlock *PHIMBB = getMBBFromIndex(PHI->def); @@ -767,7 +942,16 @@ public: continue; if (TargetRegisterInfo::isVirtualRegister(Reg)) { LiveInterval &LI = LIS.getInterval(Reg); - updateRange(LI, Reg); + if (LI.hasSubRanges()) { + unsigned SubReg = MO->getSubReg(); + unsigned LaneMask = TRI.getSubRegIndexLaneMask(SubReg); + for (LiveInterval::SubRange &S : LI.subranges()) { + if ((S.LaneMask & LaneMask) == 0) + continue; + updateRange(S, Reg, S.LaneMask); + } + } + updateRange(LI, Reg, 0); continue; } @@ -775,7 +959,7 @@ public: // precomputed live range. for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) if (LiveRange *LR = getRegUnitLI(*Units)) - updateRange(*LR, *Units); + updateRange(*LR, *Units, 0); } if (hasRegMask) updateRegMaskSlots(); @@ -784,21 +968,24 @@ public: private: /// Update a single live range, assuming an instruction has been moved from /// OldIdx to NewIdx. - void updateRange(LiveRange &LR, unsigned Reg) { + void updateRange(LiveRange &LR, unsigned Reg, unsigned LaneMask) { if (!Updated.insert(&LR).second) return; DEBUG({ dbgs() << " "; - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (TargetRegisterInfo::isVirtualRegister(Reg)) { dbgs() << PrintReg(Reg); - else + if (LaneMask != 0) + dbgs() << format(" L%04X", LaneMask); + } else { dbgs() << PrintRegUnit(Reg, &TRI); + } dbgs() << ":\t" << LR << '\n'; }); if (SlotIndex::isEarlierInstr(OldIdx, NewIdx)) handleMoveDown(LR); else - handleMoveUp(LR, Reg); + handleMoveUp(LR, Reg, LaneMask); DEBUG(dbgs() << " -->\t" << LR << '\n'); LR.verify(); } @@ -911,7 +1098,7 @@ private: /// Hoist kill to NewIdx, then scan for last kill between NewIdx and /// OldIdx. /// - void handleMoveUp(LiveRange &LR, unsigned Reg) { + void handleMoveUp(LiveRange &LR, unsigned Reg, unsigned LaneMask) { // First look for a kill at OldIdx. LiveRange::iterator I = LR.find(OldIdx.getBaseIndex()); LiveRange::iterator E = LR.end(); @@ -932,7 +1119,7 @@ private: if (I == E || !SlotIndex::isSameInstr(I->start, OldIdx)) { // No def, search for the new kill. // This can never be an early clobber kill since there is no def. - std::prev(I)->end = findLastUseBefore(Reg).getRegSlot(); + std::prev(I)->end = findLastUseBefore(Reg, LaneMask).getRegSlot(); return; } } @@ -988,15 +1175,17 @@ private: } // Return the last use of reg between NewIdx and OldIdx. - SlotIndex findLastUseBefore(unsigned Reg) { + SlotIndex findLastUseBefore(unsigned Reg, unsigned LaneMask) { if (TargetRegisterInfo::isVirtualRegister(Reg)) { SlotIndex LastUse = NewIdx; - for (MachineRegisterInfo::use_instr_nodbg_iterator - UI = MRI.use_instr_nodbg_begin(Reg), - UE = MRI.use_instr_nodbg_end(); - UI != UE; ++UI) { - const MachineInstr* MI = &*UI; + for (MachineOperand &MO : MRI.use_nodbg_operands(Reg)) { + unsigned SubReg = MO.getSubReg(); + if (SubReg != 0 && LaneMask != 0 + && (TRI.getSubRegIndexLaneMask(SubReg) & LaneMask) == 0) + continue; + + const MachineInstr *MI = MO.getParent(); SlotIndex InstSlot = LIS.getSlotIndexes()->getInstructionIndex(MI); if (InstSlot > LastUse && InstSlot < OldIdx) LastUse = InstSlot; @@ -1062,6 +1251,94 @@ void LiveIntervals::handleMoveIntoBundle(MachineInstr* MI, HME.updateAllRanges(MI); } +void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin, + const MachineBasicBlock::iterator End, + const SlotIndex endIdx, + LiveRange &LR, const unsigned Reg, + const unsigned LaneMask) { + LiveInterval::iterator LII = LR.find(endIdx); + SlotIndex lastUseIdx; + if (LII != LR.end() && LII->start < endIdx) + lastUseIdx = LII->end; + else + --LII; + + for (MachineBasicBlock::iterator I = End; I != Begin;) { + --I; + MachineInstr *MI = I; + if (MI->isDebugValue()) + continue; + + SlotIndex instrIdx = getInstructionIndex(MI); + bool isStartValid = getInstructionFromIndex(LII->start); + bool isEndValid = getInstructionFromIndex(LII->end); + + // FIXME: This doesn't currently handle early-clobber or multiple removed + // defs inside of the region to repair. + for (MachineInstr::mop_iterator OI = MI->operands_begin(), + OE = MI->operands_end(); OI != OE; ++OI) { + const MachineOperand &MO = *OI; + if (!MO.isReg() || MO.getReg() != Reg) + continue; + + unsigned SubReg = MO.getSubReg(); + unsigned Mask = TRI->getSubRegIndexLaneMask(SubReg); + if ((Mask & LaneMask) == 0) + continue; + + if (MO.isDef()) { + if (!isStartValid) { + if (LII->end.isDead()) { + SlotIndex prevStart; + if (LII != LR.begin()) + prevStart = std::prev(LII)->start; + + // FIXME: This could be more efficient if there was a + // removeSegment method that returned an iterator. + LR.removeSegment(*LII, true); + if (prevStart.isValid()) + LII = LR.find(prevStart); + else + LII = LR.begin(); + } else { + LII->start = instrIdx.getRegSlot(); + LII->valno->def = instrIdx.getRegSlot(); + if (MO.getSubReg() && !MO.isUndef()) + lastUseIdx = instrIdx.getRegSlot(); + else + lastUseIdx = SlotIndex(); + continue; + } + } + + if (!lastUseIdx.isValid()) { + VNInfo *VNI = LR.getNextValue(instrIdx.getRegSlot(), VNInfoAllocator); + LiveRange::Segment S(instrIdx.getRegSlot(), + instrIdx.getDeadSlot(), VNI); + LII = LR.addSegment(S); + } else if (LII->start != instrIdx.getRegSlot()) { + VNInfo *VNI = LR.getNextValue(instrIdx.getRegSlot(), VNInfoAllocator); + LiveRange::Segment S(instrIdx.getRegSlot(), lastUseIdx, VNI); + LII = LR.addSegment(S); + } + + if (MO.getSubReg() && !MO.isUndef()) + lastUseIdx = instrIdx.getRegSlot(); + else + lastUseIdx = SlotIndex(); + } else if (MO.isUse()) { + // FIXME: This should probably be handled outside of this branch, + // either as part of the def case (for defs inside of the region) or + // after the loop over the region. + if (!isEndValid && !LII->end.isBlock()) + LII->end = instrIdx.getRegSlot(); + if (!lastUseIdx.isValid()) + lastUseIdx = instrIdx.getRegSlot(); + } + } + } +} + void LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, MachineBasicBlock::iterator Begin, @@ -1107,83 +1384,31 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, if (!LI.hasAtLeastOneValue()) continue; - LiveInterval::iterator LII = LI.find(endIdx); - SlotIndex lastUseIdx; - if (LII != LI.end() && LII->start < endIdx) - lastUseIdx = LII->end; - else - --LII; - - for (MachineBasicBlock::iterator I = End; I != Begin;) { - --I; - MachineInstr *MI = I; - if (MI->isDebugValue()) - continue; - - SlotIndex instrIdx = getInstructionIndex(MI); - bool isStartValid = getInstructionFromIndex(LII->start); - bool isEndValid = getInstructionFromIndex(LII->end); - - // FIXME: This doesn't currently handle early-clobber or multiple removed - // defs inside of the region to repair. - for (MachineInstr::mop_iterator OI = MI->operands_begin(), - OE = MI->operands_end(); OI != OE; ++OI) { - const MachineOperand &MO = *OI; - if (!MO.isReg() || MO.getReg() != Reg) - continue; + for (LiveInterval::SubRange &S : LI.subranges()) { + repairOldRegInRange(Begin, End, endIdx, S, Reg, S.LaneMask); + } + repairOldRegInRange(Begin, End, endIdx, LI, Reg); + } +} - if (MO.isDef()) { - if (!isStartValid) { - if (LII->end.isDead()) { - SlotIndex prevStart; - if (LII != LI.begin()) - prevStart = std::prev(LII)->start; - - // FIXME: This could be more efficient if there was a - // removeSegment method that returned an iterator. - LI.removeSegment(*LII, true); - if (prevStart.isValid()) - LII = LI.find(prevStart); - else - LII = LI.begin(); - } else { - LII->start = instrIdx.getRegSlot(); - LII->valno->def = instrIdx.getRegSlot(); - if (MO.getSubReg() && !MO.isUndef()) - lastUseIdx = instrIdx.getRegSlot(); - else - lastUseIdx = SlotIndex(); - continue; - } - } +void LiveIntervals::removePhysRegDefAt(unsigned Reg, SlotIndex Pos) { + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { + if (LiveRange *LR = getCachedRegUnit(*Units)) + if (VNInfo *VNI = LR->getVNInfoAt(Pos)) + LR->removeValNo(VNI); + } +} - if (!lastUseIdx.isValid()) { - VNInfo *VNI = LI.getNextValue(instrIdx.getRegSlot(), - VNInfoAllocator); - LiveRange::Segment S(instrIdx.getRegSlot(), - instrIdx.getDeadSlot(), VNI); - LII = LI.addSegment(S); - } else if (LII->start != instrIdx.getRegSlot()) { - VNInfo *VNI = LI.getNextValue(instrIdx.getRegSlot(), - VNInfoAllocator); - LiveRange::Segment S(instrIdx.getRegSlot(), lastUseIdx, VNI); - LII = LI.addSegment(S); - } +void LiveIntervals::removeVRegDefAt(LiveInterval &LI, SlotIndex Pos) { + VNInfo *VNI = LI.getVNInfoAt(Pos); + if (VNI == nullptr) + return; + LI.removeValNo(VNI); - if (MO.getSubReg() && !MO.isUndef()) - lastUseIdx = instrIdx.getRegSlot(); - else - lastUseIdx = SlotIndex(); - } else if (MO.isUse()) { - // FIXME: This should probably be handled outside of this branch, - // either as part of the def case (for defs inside of the region) or - // after the loop over the region. - if (!isEndValid && !LII->end.isBlock()) - LII->end = instrIdx.getRegSlot(); - if (!lastUseIdx.isValid()) - lastUseIdx = instrIdx.getRegSlot(); - } - } - } + // Also remove the value in subranges. + for (LiveInterval::SubRange &S : LI.subranges()) { + if (VNInfo *SVNI = S.getVNInfoAt(Pos)) + S.removeValNo(SVNI); } + LI.removeEmptySubRanges(); } diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp index d81221b..025d99c 100644 --- a/lib/CodeGen/LiveIntervalUnion.cpp +++ b/lib/CodeGen/LiveIntervalUnion.cpp @@ -26,14 +26,14 @@ using namespace llvm; // Merge a LiveInterval's segments. Guarantee no overlaps. -void LiveIntervalUnion::unify(LiveInterval &VirtReg) { - if (VirtReg.empty()) +void LiveIntervalUnion::unify(LiveInterval &VirtReg, const LiveRange &Range) { + if (Range.empty()) return; ++Tag; // Insert each of the virtual register's live segments into the map. - LiveInterval::iterator RegPos = VirtReg.begin(); - LiveInterval::iterator RegEnd = VirtReg.end(); + LiveRange::const_iterator RegPos = Range.begin(); + LiveRange::const_iterator RegEnd = Range.end(); SegmentIter SegPos = Segments.find(RegPos->start); while (SegPos.valid()) { @@ -53,14 +53,14 @@ void LiveIntervalUnion::unify(LiveInterval &VirtReg) { } // Remove a live virtual register's segments from this union. -void LiveIntervalUnion::extract(LiveInterval &VirtReg) { - if (VirtReg.empty()) +void LiveIntervalUnion::extract(LiveInterval &VirtReg, const LiveRange &Range) { + if (Range.empty()) return; ++Tag; // Remove each of the virtual register's live segments from the map. - LiveInterval::iterator RegPos = VirtReg.begin(); - LiveInterval::iterator RegEnd = VirtReg.end(); + LiveRange::const_iterator RegPos = Range.begin(); + LiveRange::const_iterator RegEnd = Range.end(); SegmentIter SegPos = Segments.find(RegPos->start); for (;;) { @@ -70,7 +70,7 @@ void LiveIntervalUnion::extract(LiveInterval &VirtReg) { return; // Skip all segments that may have been coalesced. - RegPos = VirtReg.advanceTo(RegPos, SegPos.start()); + RegPos = Range.advanceTo(RegPos, SegPos.start()); if (RegPos == RegEnd) return; diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp index a558e14..d804b39 100644 --- a/lib/CodeGen/LiveRangeCalc.cpp +++ b/lib/CodeGen/LiveRangeCalc.cpp @@ -19,6 +19,13 @@ using namespace llvm; #define DEBUG_TYPE "regalloc" +void LiveRangeCalc::resetLiveOutMap() { + unsigned NumBlocks = MF->getNumBlockIDs(); + Seen.clear(); + Seen.resize(NumBlocks); + Map.resize(NumBlocks); +} + void LiveRangeCalc::reset(const MachineFunction *mf, SlotIndexes *SI, MachineDominatorTree *MDT, @@ -28,126 +35,207 @@ void LiveRangeCalc::reset(const MachineFunction *mf, Indexes = SI; DomTree = MDT; Alloc = VNIA; - - unsigned N = MF->getNumBlockIDs(); - Seen.clear(); - Seen.resize(N); - LiveOut.resize(N); + resetLiveOutMap(); LiveIn.clear(); } -void LiveRangeCalc::createDeadDefs(LiveRange &LR, unsigned Reg) { +static void createDeadDef(SlotIndexes &Indexes, VNInfo::Allocator &Alloc, + LiveRange &LR, const MachineOperand &MO) { + const MachineInstr *MI = MO.getParent(); + SlotIndex DefIdx = + Indexes.getInstructionIndex(MI).getRegSlot(MO.isEarlyClobber()); + + // Create the def in LR. This may find an existing def. + LR.createDeadDef(DefIdx, Alloc); +} + +void LiveRangeCalc::calculate(LiveInterval &LI) { assert(MRI && Indexes && "call reset() first"); + // Step 1: Create minimal live segments for every definition of Reg. // Visit all def operands. If the same instruction has multiple defs of Reg, - // LR.createDeadDef() will deduplicate. - for (MachineOperand &MO : MRI->def_operands(Reg)) { - const MachineInstr *MI = MO.getParent(); - // Find the corresponding slot index. - SlotIndex Idx; - if (MI->isPHI()) - // PHI defs begin at the basic block start index. - Idx = Indexes->getMBBStartIdx(MI->getParent()); - else - // Instructions are either normal 'r', or early clobber 'e'. - Idx = Indexes->getInstructionIndex(MI) - .getRegSlot(MO.isEarlyClobber()); + // createDeadDef() will deduplicate. + const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo(); + unsigned Reg = LI.reg; + for (const MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) { + if (!MO.isDef() && !MO.readsReg()) + continue; - // Create the def in LR. This may find an existing def. - LR.createDeadDef(Idx, *Alloc); + unsigned SubReg = MO.getSubReg(); + if (LI.hasSubRanges() || (SubReg != 0 && MRI->tracksSubRegLiveness())) { + unsigned Mask = SubReg != 0 ? TRI.getSubRegIndexLaneMask(SubReg) + : MRI->getMaxLaneMaskForVReg(Reg); + + // If this is the first time we see a subregister def, initialize + // subranges by creating a copy of the main range. + if (!LI.hasSubRanges() && !LI.empty()) { + unsigned ClassMask = MRI->getMaxLaneMaskForVReg(Reg); + LI.createSubRangeFrom(*Alloc, ClassMask, LI); + } + + for (LiveInterval::SubRange &S : LI.subranges()) { + // A Mask for subregs common to the existing subrange and current def. + unsigned Common = S.LaneMask & Mask; + if (Common == 0) + continue; + // A Mask for subregs covered by the subrange but not the current def. + unsigned LRest = S.LaneMask & ~Mask; + LiveInterval::SubRange *CommonRange; + if (LRest != 0) { + // Split current subrange into Common and LRest ranges. + S.LaneMask = LRest; + CommonRange = LI.createSubRangeFrom(*Alloc, Common, S); + } else { + assert(Common == S.LaneMask); + CommonRange = &S; + } + if (MO.isDef()) + createDeadDef(*Indexes, *Alloc, *CommonRange, MO); + Mask &= ~Common; + } + // Create a new SubRange for subregs we did not cover yet. + if (Mask != 0) { + LiveInterval::SubRange *NewRange = LI.createSubRange(*Alloc, Mask); + if (MO.isDef()) + createDeadDef(*Indexes, *Alloc, *NewRange, MO); + } + } + + // Create the def in the main liverange. We do not have to do this if + // subranges are tracked as we recreate the main range later in this case. + if (MO.isDef() && !LI.hasSubRanges()) + createDeadDef(*Indexes, *Alloc, LI, MO); + } + + // We may have created empty live ranges for partially undefined uses, we + // can't keep them because we won't find defs in them later. + LI.removeEmptySubRanges(); + + // Step 2: Extend live segments to all uses, constructing SSA form as + // necessary. + if (LI.hasSubRanges()) { + for (LiveInterval::SubRange &S : LI.subranges()) { + resetLiveOutMap(); + extendToUses(S, Reg, S.LaneMask); + } + LI.clear(); + LI.constructMainRangeFromSubranges(*Indexes, *Alloc); + } else { + resetLiveOutMap(); + extendToUses(LI, Reg, ~0u); } } -void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg) { +void LiveRangeCalc::createDeadDefs(LiveRange &LR, unsigned Reg) { assert(MRI && Indexes && "call reset() first"); + // Visit all def operands. If the same instruction has multiple defs of Reg, + // LR.createDeadDef() will deduplicate. + for (MachineOperand &MO : MRI->def_operands(Reg)) + createDeadDef(*Indexes, *Alloc, LR, MO); +} + + +void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg, unsigned Mask) { // Visit all operands that read Reg. This may include partial defs. + const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo(); for (MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) { // Clear all kill flags. They will be reinserted after register allocation // by LiveIntervalAnalysis::addKillFlags(). if (MO.isUse()) MO.setIsKill(false); + else { + // We only care about uses, but on the main range (mask ~0u) this includes + // the "virtual" reads happening for subregister defs. + if (Mask != ~0u) + continue; + } + if (!MO.readsReg()) continue; - // MI is reading Reg. We may have visited MI before if it happens to be - // reading Reg multiple times. That is OK, extend() is idempotent. + unsigned SubReg = MO.getSubReg(); + if (SubReg != 0) { + unsigned SubRegMask = TRI.getSubRegIndexLaneMask(SubReg); + // Ignore uses not covering the current subrange. + if ((SubRegMask & Mask) == 0) + continue; + } + + // Determine the actual place of the use. const MachineInstr *MI = MO.getParent(); unsigned OpNo = (&MO - &MI->getOperand(0)); - - // Find the SlotIndex being read. - SlotIndex Idx; + SlotIndex UseIdx; if (MI->isPHI()) { assert(!MO.isDef() && "Cannot handle PHI def of partial register."); - // PHI operands are paired: (Reg, PredMBB). - // Extend the live range to be live-out from PredMBB. - Idx = Indexes->getMBBEndIdx(MI->getOperand(OpNo+1).getMBB()); + // The actual place where a phi operand is used is the end of the pred + // MBB. PHI operands are paired: (Reg, PredMBB). + UseIdx = Indexes->getMBBEndIdx(MI->getOperand(OpNo+1).getMBB()); } else { - // This is a normal instruction. - Idx = Indexes->getInstructionIndex(MI).getRegSlot(); // Check for early-clobber redefs. + bool isEarlyClobber = false; unsigned DefIdx; - if (MO.isDef()) { - if (MO.isEarlyClobber()) - Idx = Idx.getRegSlot(true); - } else if (MI->isRegTiedToDefOperand(OpNo, &DefIdx)) { + if (MO.isDef()) + isEarlyClobber = MO.isEarlyClobber(); + else if (MI->isRegTiedToDefOperand(OpNo, &DefIdx)) { // FIXME: This would be a lot easier if tied early-clobber uses also // had an early-clobber flag. - if (MI->getOperand(DefIdx).isEarlyClobber()) - Idx = Idx.getRegSlot(true); + isEarlyClobber = MI->getOperand(DefIdx).isEarlyClobber(); } + UseIdx = Indexes->getInstructionIndex(MI).getRegSlot(isEarlyClobber); } - extend(LR, Idx, Reg); + + // MI is reading Reg. We may have visited MI before if it happens to be + // reading Reg multiple times. That is OK, extend() is idempotent. + extend(LR, UseIdx, Reg); } } -// Transfer information from the LiveIn vector to the live ranges. -void LiveRangeCalc::updateLiveIns() { +void LiveRangeCalc::updateFromLiveIns() { LiveRangeUpdater Updater; - for (SmallVectorImpl<LiveInBlock>::iterator I = LiveIn.begin(), - E = LiveIn.end(); I != E; ++I) { - if (!I->DomNode) + for (const LiveInBlock &I : LiveIn) { + if (!I.DomNode) continue; - MachineBasicBlock *MBB = I->DomNode->getBlock(); - assert(I->Value && "No live-in value found"); + MachineBasicBlock *MBB = I.DomNode->getBlock(); + assert(I.Value && "No live-in value found"); SlotIndex Start, End; std::tie(Start, End) = Indexes->getMBBRange(MBB); - if (I->Kill.isValid()) + if (I.Kill.isValid()) // Value is killed inside this block. - End = I->Kill; + End = I.Kill; else { // The value is live-through, update LiveOut as well. // Defer the Domtree lookup until it is needed. assert(Seen.test(MBB->getNumber())); - LiveOut[MBB] = LiveOutPair(I->Value, (MachineDomTreeNode *)nullptr); + Map[MBB] = LiveOutPair(I.Value, nullptr); } - Updater.setDest(&I->LR); - Updater.add(Start, End, I->Value); + Updater.setDest(&I.LR); + Updater.add(Start, End, I.Value); } LiveIn.clear(); } -void LiveRangeCalc::extend(LiveRange &LR, SlotIndex Kill, unsigned PhysReg) { - assert(Kill.isValid() && "Invalid SlotIndex"); +void LiveRangeCalc::extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg) { + assert(Use.isValid() && "Invalid SlotIndex"); assert(Indexes && "Missing SlotIndexes"); assert(DomTree && "Missing dominator tree"); - MachineBasicBlock *KillMBB = Indexes->getMBBFromIndex(Kill.getPrevSlot()); - assert(KillMBB && "No MBB at Kill"); + MachineBasicBlock *UseMBB = Indexes->getMBBFromIndex(Use.getPrevSlot()); + assert(UseMBB && "No MBB at Use"); // Is there a def in the same MBB we can extend? - if (LR.extendInBlock(Indexes->getMBBStartIdx(KillMBB), Kill)) + if (LR.extendInBlock(Indexes->getMBBStartIdx(UseMBB), Use)) return; - // Find the single reaching def, or determine if Kill is jointly dominated by + // Find the single reaching def, or determine if Use is jointly dominated by // multiple values, and we may need to create even more phi-defs to preserve // VNInfo SSA form. Perform a search for all predecessor blocks where we // know the dominating VNInfo. - if (findReachingDefs(LR, *KillMBB, Kill, PhysReg)) + if (findReachingDefs(LR, *UseMBB, Use, PhysReg)) return; // When there were multiple different values, we may need new PHIs. @@ -162,16 +250,16 @@ void LiveRangeCalc::calculateValues() { assert(Indexes && "Missing SlotIndexes"); assert(DomTree && "Missing dominator tree"); updateSSA(); - updateLiveIns(); + updateFromLiveIns(); } -bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB, - SlotIndex Kill, unsigned PhysReg) { - unsigned KillMBBNum = KillMBB.getNumber(); +bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, + SlotIndex Use, unsigned PhysReg) { + unsigned UseMBBNum = UseMBB.getNumber(); // Block numbers where LR should be live-in. - SmallVector<unsigned, 16> WorkList(1, KillMBBNum); + SmallVector<unsigned, 16> WorkList(1, UseMBBNum); // Remember if we have seen more than one value. bool UniqueVNI = true; @@ -202,7 +290,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB, // Is this a known live-out block? if (Seen.test(Pred->getNumber())) { - if (VNInfo *VNI = LiveOut[Pred].first) { + if (VNInfo *VNI = Map[Pred].first) { if (TheVNI && TheVNI != VNI) UniqueVNI = false; TheVNI = VNI; @@ -225,11 +313,11 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB, } // No, we need a live-in value for Pred as well - if (Pred != &KillMBB) + if (Pred != &UseMBB) WorkList.push_back(Pred->getNumber()); else - // Loopback to KillMBB, so value is really live through. - Kill = SlotIndex(); + // Loopback to UseMBB, so value is really live through. + Use = SlotIndex(); } } @@ -247,12 +335,11 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB, E = WorkList.end(); I != E; ++I) { SlotIndex Start, End; std::tie(Start, End) = Indexes->getMBBRange(*I); - // Trim the live range in KillMBB. - if (*I == KillMBBNum && Kill.isValid()) - End = Kill; + // Trim the live range in UseMBB. + if (*I == UseMBBNum && Use.isValid()) + End = Use; else - LiveOut[MF->getBlockNumbered(*I)] = - LiveOutPair(TheVNI, nullptr); + Map[MF->getBlockNumbered(*I)] = LiveOutPair(TheVNI, nullptr); Updater.add(Start, End, TheVNI); } return true; @@ -265,8 +352,8 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB, I = WorkList.begin(), E = WorkList.end(); I != E; ++I) { MachineBasicBlock *MBB = MF->getBlockNumbered(*I); addLiveInBlock(LR, DomTree->getNode(MBB)); - if (MBB == &KillMBB) - LiveIn.back().Kill = Kill; + if (MBB == &UseMBB) + LiveIn.back().Kill = Use; } return false; @@ -285,9 +372,8 @@ void LiveRangeCalc::updateSSA() { Changes = 0; // Propagate live-out values down the dominator tree, inserting phi-defs // when necessary. - for (SmallVectorImpl<LiveInBlock>::iterator I = LiveIn.begin(), - E = LiveIn.end(); I != E; ++I) { - MachineDomTreeNode *Node = I->DomNode; + for (LiveInBlock &I : LiveIn) { + MachineDomTreeNode *Node = I.DomNode; // Skip block if the live-in value has already been determined. if (!Node) continue; @@ -303,16 +389,16 @@ void LiveRangeCalc::updateSSA() { // immediate dominator. Check if any of them have live-out values that are // properly dominated by IDom. If so, we need a phi-def here. if (!needPHI) { - IDomValue = LiveOut[IDom->getBlock()]; + IDomValue = Map[IDom->getBlock()]; // Cache the DomTree node that defined the value. if (IDomValue.first && !IDomValue.second) - LiveOut[IDom->getBlock()].second = IDomValue.second = + Map[IDom->getBlock()].second = IDomValue.second = DomTree->getNode(Indexes->getMBBFromIndex(IDomValue.first->def)); for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), PE = MBB->pred_end(); PI != PE; ++PI) { - LiveOutPair &Value = LiveOut[*PI]; + LiveOutPair &Value = Map[*PI]; if (!Value.first || Value.first == IDomValue.first) continue; @@ -334,7 +420,7 @@ void LiveRangeCalc::updateSSA() { // The value may be live-through even if Kill is set, as can happen when // we are called from extendRange. In that case LiveOutSeen is true, and // LiveOut indicates a foreign or missing value. - LiveOutPair &LOP = LiveOut[MBB]; + LiveOutPair &LOP = Map[MBB]; // Create a phi-def if required. if (needPHI) { @@ -342,25 +428,25 @@ void LiveRangeCalc::updateSSA() { assert(Alloc && "Need VNInfo allocator to create PHI-defs"); SlotIndex Start, End; std::tie(Start, End) = Indexes->getMBBRange(MBB); - LiveRange &LR = I->LR; + LiveRange &LR = I.LR; VNInfo *VNI = LR.getNextValue(Start, *Alloc); - I->Value = VNI; + I.Value = VNI; // This block is done, we know the final value. - I->DomNode = nullptr; + I.DomNode = nullptr; - // Add liveness since updateLiveIns now skips this node. - if (I->Kill.isValid()) - LR.addSegment(LiveInterval::Segment(Start, I->Kill, VNI)); + // Add liveness since updateFromLiveIns now skips this node. + if (I.Kill.isValid()) + LR.addSegment(LiveInterval::Segment(Start, I.Kill, VNI)); else { LR.addSegment(LiveInterval::Segment(Start, End, VNI)); LOP = LiveOutPair(VNI, Node); } } else if (IDomValue.first) { // No phi-def here. Remember incoming value. - I->Value = IDomValue.first; + I.Value = IDomValue.first; // If the IDomValue is killed in the block, don't propagate through. - if (I->Kill.isValid()) + if (I.Kill.isValid()) continue; // Propagate IDomValue if it isn't killed: diff --git a/lib/CodeGen/LiveRangeCalc.h b/lib/CodeGen/LiveRangeCalc.h index 345d6c4..90bf971 100644 --- a/lib/CodeGen/LiveRangeCalc.h +++ b/lib/CodeGen/LiveRangeCalc.h @@ -40,12 +40,6 @@ class LiveRangeCalc { MachineDominatorTree *DomTree; VNInfo::Allocator *Alloc; - /// Seen - Bit vector of active entries in LiveOut, also used as a visited - /// set by findReachingDefs. One entry per basic block, indexed by block - /// number. This is kept as a separate bit vector because it can be cleared - /// quickly when switching live ranges. - BitVector Seen; - /// LiveOutPair - A value and the block that defined it. The domtree node is /// redundant, it can be computed as: MDT[Indexes.getMBBFromIndex(VNI->def)]. typedef std::pair<VNInfo*, MachineDomTreeNode*> LiveOutPair; @@ -53,8 +47,14 @@ class LiveRangeCalc { /// LiveOutMap - Map basic blocks to the value leaving the block. typedef IndexedMap<LiveOutPair, MBB2NumberFunctor> LiveOutMap; - /// LiveOut - Map each basic block where a live range is live out to the - /// live-out value and its defining block. + /// Bit vector of active entries in LiveOut, also used as a visited set by + /// findReachingDefs. One entry per basic block, indexed by block number. + /// This is kept as a separate bit vector because it can be cleared quickly + /// when switching live ranges. + BitVector Seen; + + /// Map each basic block where a live range is live out to the live-out value + /// and its defining block. /// /// For every basic block, MBB, one of these conditions shall be true: /// @@ -70,7 +70,7 @@ class LiveRangeCalc { /// /// The map can be shared by multiple live ranges as long as no two are /// live-out of the same block. - LiveOutMap LiveOut; + LiveOutMap Map; /// LiveInBlock - Information about a basic block where a live range is known /// to be live-in, but the value has not yet been determined. @@ -101,17 +101,17 @@ class LiveRangeCalc { /// used to add entries directly. SmallVector<LiveInBlock, 16> LiveIn; - /// Assuming that LI is live-in to KillMBB and killed at Kill, find the set - /// of defs that can reach it. + /// Assuming that @p LR is live-in to @p UseMBB, find the set of defs that can + /// reach it. /// - /// If only one def can reach Kill, all paths from the def to kill are added - /// to LI, and the function returns true. + /// If only one def can reach @p UseMBB, all paths from the def to @p UseMBB + /// are added to @p LR, and the function returns true. /// - /// If multiple values can reach Kill, the blocks that need LI to be live in - /// are added to the LiveIn array, and the function returns false. + /// If multiple values can reach @p UseMBB, the blocks that need @p LR to be + /// live in are added to the LiveIn array, and the function returns false. /// /// PhysReg, when set, is used to verify live-in lists on basic blocks. - bool findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB, + bool findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, SlotIndex Kill, unsigned PhysReg); /// updateSSA - Compute the values that will be live in to all requested @@ -121,8 +121,18 @@ class LiveRangeCalc { /// blocks. No values are read from the live ranges. void updateSSA(); - /// Add liveness as specified in the LiveIn vector. - void updateLiveIns(); + /// Transfer information from the LiveIn vector to the live ranges and update + /// the given @p LiveOuts. + void updateFromLiveIns(); + + /// Extend the live range of @p LR to reach all uses of Reg. + /// + /// All uses must be jointly dominated by existing liveness. PHI-defs are + /// inserted as needed to preserve SSA form. + void extendToUses(LiveRange &LR, unsigned Reg, unsigned LaneMask); + + /// Reset Map and Seen fields. + void resetLiveOutMap(); public: LiveRangeCalc() : MF(nullptr), MRI(nullptr), Indexes(nullptr), @@ -152,37 +162,33 @@ public: // Modify existing live ranges. // - /// extend - Extend the live range of LI to reach Kill. + /// Extend the live range of @p LR to reach @p Use. /// - /// The existing values in LI must be live so they jointly dominate Kill. If - /// Kill is not dominated by a single existing value, PHI-defs are inserted - /// as required to preserve SSA form. If Kill is known to be dominated by a - /// single existing value, Alloc may be null. + /// The existing values in @p LR must be live so they jointly dominate @p Use. + /// If @p Use is not dominated by a single existing value, PHI-defs are + /// inserted as required to preserve SSA form. /// /// PhysReg, when set, is used to verify live-in lists on basic blocks. - void extend(LiveRange &LR, SlotIndex Kill, unsigned PhysReg = 0); + void extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg = 0); /// createDeadDefs - Create a dead def in LI for every def operand of Reg. /// Each instruction defining Reg gets a new VNInfo with a corresponding /// minimal live range. void createDeadDefs(LiveRange &LR, unsigned Reg); - /// createDeadDefs - Create a dead def in LI for every def of LI->reg. - void createDeadDefs(LiveInterval &LI) { - createDeadDefs(LI, LI.reg); - } - - /// extendToUses - Extend the live range of LI to reach all uses of Reg. + /// Extend the live range of @p LR to reach all uses of Reg. /// /// All uses must be jointly dominated by existing liveness. PHI-defs are /// inserted as needed to preserve SSA form. - void extendToUses(LiveRange &LR, unsigned Reg); - - /// extendToUses - Extend the live range of LI to reach all uses of LI->reg. - void extendToUses(LiveInterval &LI) { - extendToUses(LI, LI.reg); + void extendToUses(LiveRange &LR, unsigned PhysReg) { + extendToUses(LR, PhysReg, ~0u); } + /// Calculates liveness for the register specified in live interval @p LI. + /// Creates subregister live ranges as needed if subreg liveness tracking is + /// enabled. + void calculate(LiveInterval &LI); + //===--------------------------------------------------------------------===// // Low-level interface. //===--------------------------------------------------------------------===// @@ -204,7 +210,7 @@ public: /// addLiveInBlock(). void setLiveOutValue(MachineBasicBlock *MBB, VNInfo *VNI) { Seen.set(MBB->getNumber()); - LiveOut[MBB] = LiveOutPair(VNI, nullptr); + Map[MBB] = LiveOutPair(VNI, nullptr); } /// addLiveInBlock - Add a block with an unknown live-in value. This diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp index a0fb712..0edc897 100644 --- a/lib/CodeGen/LiveRangeEdit.cpp +++ b/lib/CodeGen/LiveRangeEdit.cpp @@ -60,9 +60,7 @@ bool LiveRangeEdit::checkRematerializable(VNInfo *VNI, } void LiveRangeEdit::scanRemattable(AliasAnalysis *aa) { - for (LiveInterval::vni_iterator I = getParent().vni_begin(), - E = getParent().vni_end(); I != E; ++I) { - VNInfo *VNI = *I; + for (VNInfo *VNI : getParent().valnos) { if (VNI->isUnused()) continue; MachineInstr *DefMI = LIS.getInstructionFromIndex(VNI->def); @@ -258,15 +256,8 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) { // Check if MI reads any unreserved physregs. if (Reg && MOI->readsReg() && !MRI.isReserved(Reg)) ReadsPhysRegs = true; - else if (MOI->isDef()) { - for (MCRegUnitIterator Units(Reg, MRI.getTargetRegisterInfo()); - Units.isValid(); ++Units) { - if (LiveRange *LR = LIS.getCachedRegUnit(*Units)) { - if (VNInfo *VNI = LR->getVNInfoAt(Idx)) - LR->removeValNo(VNI); - } - } - } + else if (MOI->isDef()) + LIS.removePhysRegDefAt(Reg, Idx); continue; } LiveInterval &LI = LIS.getInterval(Reg); @@ -282,13 +273,11 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) { // Remove defined value. if (MOI->isDef()) { - if (VNInfo *VNI = LI.getVNInfoAt(Idx)) { - if (TheDelegate) - TheDelegate->LRE_WillShrinkVirtReg(LI.reg); - LI.removeValNo(VNI); - if (LI.empty()) - RegsToErase.push_back(Reg); - } + if (TheDelegate && LI.getVNInfoAt(Idx) != nullptr) + TheDelegate->LRE_WillShrinkVirtReg(LI.reg); + LIS.removeVRegDefAt(LI, Idx); + if (LI.empty()) + RegsToErase.push_back(Reg); } } @@ -410,7 +399,7 @@ LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF, VirtRegAuxInfo VRAI(MF, LIS, Loops, MBFI); for (unsigned I = 0, Size = size(); I < Size; ++I) { LiveInterval &LI = LIS.getInterval(get(I)); - if (MRI.recomputeRegClass(LI.reg, MF.getTarget())) + if (MRI.recomputeRegClass(LI.reg)) DEBUG({ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); dbgs() << "Inflated " << PrintReg(LI.reg) << " to " diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp index a8cae08..154ce6f 100644 --- a/lib/CodeGen/LiveRegMatrix.cpp +++ b/lib/CodeGen/LiveRegMatrix.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -71,16 +72,44 @@ void LiveRegMatrix::releaseMemory() { } } +template<typename Callable> +bool foreachUnit(const TargetRegisterInfo *TRI, LiveInterval &VRegInterval, + unsigned PhysReg, Callable Func) { + if (VRegInterval.hasSubRanges()) { + for (MCRegUnitMaskIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + unsigned Unit = (*Units).first; + unsigned Mask = (*Units).second; + for (LiveInterval::SubRange &S : VRegInterval.subranges()) { + if (S.LaneMask & Mask) { + if (Func(Unit, S)) + return true; + break; + } + } + } + } else { + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + if (Func(*Units, VRegInterval)) + return true; + } + } + return false; +} + void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) { DEBUG(dbgs() << "assigning " << PrintReg(VirtReg.reg, TRI) << " to " << PrintReg(PhysReg, TRI) << ':'); assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment"); VRM->assignVirt2Phys(VirtReg.reg, PhysReg); MRI->setPhysRegUsed(PhysReg); - for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { - DEBUG(dbgs() << ' ' << PrintRegUnit(*Units, TRI)); - Matrix[*Units].unify(VirtReg); - } + + foreachUnit(TRI, VirtReg, PhysReg, [&](unsigned Unit, + const LiveRange &Range) { + DEBUG(dbgs() << ' ' << PrintRegUnit(Unit, TRI) << ' ' << Range); + Matrix[Unit].unify(VirtReg, Range); + return false; + }); + ++NumAssigned; DEBUG(dbgs() << '\n'); } @@ -90,10 +119,14 @@ void LiveRegMatrix::unassign(LiveInterval &VirtReg) { DEBUG(dbgs() << "unassigning " << PrintReg(VirtReg.reg, TRI) << " from " << PrintReg(PhysReg, TRI) << ':'); VRM->clearVirt(VirtReg.reg); - for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { - DEBUG(dbgs() << ' ' << PrintRegUnit(*Units, TRI)); - Matrix[*Units].extract(VirtReg); - } + + foreachUnit(TRI, VirtReg, PhysReg, [&](unsigned Unit, + const LiveRange &Range) { + DEBUG(dbgs() << ' ' << PrintRegUnit(Unit, TRI)); + Matrix[Unit].extract(VirtReg, Range); + return false; + }); + ++NumUnassigned; DEBUG(dbgs() << '\n'); } @@ -121,12 +154,13 @@ bool LiveRegMatrix::checkRegUnitInterference(LiveInterval &VirtReg, if (VirtReg.empty()) return false; CoalescerPair CP(VirtReg.reg, PhysReg, *TRI); - for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { - const LiveRange &UnitRange = LIS->getRegUnit(*Units); - if (VirtReg.overlaps(UnitRange, CP, *LIS->getSlotIndexes())) - return true; - } - return false; + + bool Result = foreachUnit(TRI, VirtReg, PhysReg, [&](unsigned Unit, + const LiveRange &Range) { + const LiveRange &UnitRange = LIS->getRegUnit(Unit); + return Range.overlaps(UnitRange, CP, *LIS->getSlotIndexes()); + }); + return Result; } LiveIntervalUnion::Query &LiveRegMatrix::query(LiveInterval &VirtReg, diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp index 5c5712f..e8bf687 100644 --- a/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -291,6 +291,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { // Debug value, stackmap and patchpoint instructions can't be out of // range, so they don't need any updates. if (MI->isDebugValue() || + MI->getOpcode() == TargetOpcode::STATEPOINT || MI->getOpcode() == TargetOpcode::STACKMAP || MI->getOpcode() == TargetOpcode::PATCHPOINT) continue; diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 3058b1a..3c73905 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -24,7 +24,6 @@ #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/LeakDetector.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/Support/Debug.h" @@ -45,7 +44,6 @@ MachineBasicBlock::MachineBasicBlock(MachineFunction &mf, const BasicBlock *bb) } MachineBasicBlock::~MachineBasicBlock() { - LeakDetector::removeGarbageObject(this); } /// getSymbol - Return the MCSymbol for this basic block. @@ -54,9 +52,7 @@ MCSymbol *MachineBasicBlock::getSymbol() const { if (!CachedMCSymbol) { const MachineFunction *MF = getParent(); MCContext &Ctx = MF->getContext(); - const TargetMachine &TM = MF->getTarget(); - const char *Prefix = - TM.getSubtargetImpl()->getDataLayout()->getPrivateGlobalPrefix(); + const char *Prefix = Ctx.getAsmInfo()->getPrivateLabelPrefix(); CachedMCSymbol = Ctx.GetOrCreateSymbol(Twine(Prefix) + "BB" + Twine(MF->getFunctionNumber()) + "_" + Twine(getNumber())); @@ -87,14 +83,11 @@ void ilist_traits<MachineBasicBlock>::addNodeToList(MachineBasicBlock *N) { for (MachineBasicBlock::instr_iterator I = N->instr_begin(), E = N->instr_end(); I != E; ++I) I->AddRegOperandsToUseLists(RegInfo); - - LeakDetector::removeGarbageObject(N); } void ilist_traits<MachineBasicBlock>::removeNodeFromList(MachineBasicBlock *N) { N->getParent()->removeFromMBBNumbering(N->Number); N->Number = -1; - LeakDetector::addGarbageObject(N); } @@ -109,8 +102,6 @@ void ilist_traits<MachineInstr>::addNodeToList(MachineInstr *N) { // use/def lists. MachineFunction *MF = Parent->getParent(); N->AddRegOperandsToUseLists(MF->getRegInfo()); - - LeakDetector::removeGarbageObject(N); } /// removeNodeFromList (MI) - When we remove an instruction from a basic block @@ -124,8 +115,6 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) { N->RemoveRegOperandsFromUseLists(MF->getRegInfo()); N->setParent(nullptr); - - LeakDetector::addGarbageObject(N); } /// transferNodesFromList (MI) - When moving a range of instructions from one diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 08fd200..1b5c1f1 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -347,65 +347,61 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor( uint32_t WeightScale = 0; uint32_t SumWeight = MBPI->getSumForBlock(BB, WeightScale); DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n"); - for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), - SE = BB->succ_end(); - SI != SE; ++SI) { - if (BlockFilter && !BlockFilter->count(*SI)) + for (MachineBasicBlock *Succ : BB->successors()) { + if (BlockFilter && !BlockFilter->count(Succ)) continue; - BlockChain &SuccChain = *BlockToChain[*SI]; + BlockChain &SuccChain = *BlockToChain[Succ]; if (&SuccChain == &Chain) { - DEBUG(dbgs() << " " << getBlockName(*SI) << " -> Already merged!\n"); + DEBUG(dbgs() << " " << getBlockName(Succ) << " -> Already merged!\n"); continue; } - if (*SI != *SuccChain.begin()) { - DEBUG(dbgs() << " " << getBlockName(*SI) << " -> Mid chain!\n"); + if (Succ != *SuccChain.begin()) { + DEBUG(dbgs() << " " << getBlockName(Succ) << " -> Mid chain!\n"); continue; } - uint32_t SuccWeight = MBPI->getEdgeWeight(BB, *SI); + uint32_t SuccWeight = MBPI->getEdgeWeight(BB, Succ); BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight); // Only consider successors which are either "hot", or wouldn't violate // any CFG constraints. if (SuccChain.LoopPredecessors != 0) { if (SuccProb < HotProb) { - DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb + DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb << " (prob) (CFG conflict)\n"); continue; } - // Make sure that a hot successor doesn't have a globally more important - // predecessor. - BlockFrequency CandidateEdgeFreq - = MBFI->getBlockFreq(BB) * SuccProb * HotProb.getCompl(); + // Make sure that a hot successor doesn't have a globally more + // important predecessor. + BlockFrequency CandidateEdgeFreq = + MBFI->getBlockFreq(BB) * SuccProb * HotProb.getCompl(); bool BadCFGConflict = false; - for (MachineBasicBlock::pred_iterator PI = (*SI)->pred_begin(), - PE = (*SI)->pred_end(); - PI != PE; ++PI) { - if (*PI == *SI || (BlockFilter && !BlockFilter->count(*PI)) || - BlockToChain[*PI] == &Chain) + for (MachineBasicBlock *Pred : Succ->predecessors()) { + if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) || + BlockToChain[Pred] == &Chain) continue; - BlockFrequency PredEdgeFreq - = MBFI->getBlockFreq(*PI) * MBPI->getEdgeProbability(*PI, *SI); + BlockFrequency PredEdgeFreq = + MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, Succ); if (PredEdgeFreq >= CandidateEdgeFreq) { BadCFGConflict = true; break; } } if (BadCFGConflict) { - DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb + DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb << " (prob) (non-cold CFG conflict)\n"); continue; } } - DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb + DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb << " (prob)" << (SuccChain.LoopPredecessors != 0 ? " (CFG break)" : "") << "\n"); if (BestSucc && BestWeight >= SuccWeight) continue; - BestSucc = *SI; + BestSucc = Succ; BestWeight = SuccWeight; } return BestSucc; @@ -1043,12 +1039,8 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // exclusively on the loop info here so that we can align backedges in // unnatural CFGs and backedges that were introduced purely because of the // loop rotations done during this layout pass. - if (F.getFunction()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize)) + if (F.getFunction()->hasFnAttribute(Attribute::OptimizeForSize)) return; - unsigned Align = TLI->getPrefLoopAlignment(); - if (!Align) - return; // Don't care about loop alignment. if (FunctionChain.begin() == FunctionChain.end()) return; // Empty chain. @@ -1066,6 +1058,10 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { if (!L) continue; + unsigned Align = TLI->getPrefLoopAlignment(L); + if (!Align) + continue; // Don't care about loop alignment. + // If the block is cold relative to the function entry don't waste space // aligning it. BlockFrequency Freq = MBFI->getBlockFreq(*BI); diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index ae26967..21b9c5a 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -451,6 +451,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs; SmallVector<unsigned, 2> ImplicitDefsToUpdate; + SmallVector<unsigned, 2> ImplicitDefs; for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) { MachineInstr *MI = &*I; ++I; @@ -542,6 +543,12 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { // we should make sure it is not dead at CSMI. if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead()) ImplicitDefsToUpdate.push_back(i); + + // Keep track of implicit defs of CSMI and MI, to clear possibly + // made-redundant kill flags. + if (MO.isImplicit() && !MO.isDead() && OldReg == NewReg) + ImplicitDefs.push_back(OldReg); + if (OldReg == NewReg) { --NumDefs; continue; @@ -573,8 +580,15 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { // Actually perform the elimination. if (DoCSE) { for (unsigned i = 0, e = CSEPairs.size(); i != e; ++i) { - MRI->replaceRegWith(CSEPairs[i].first, CSEPairs[i].second); - MRI->clearKillFlags(CSEPairs[i].second); + unsigned OldReg = CSEPairs[i].first; + unsigned NewReg = CSEPairs[i].second; + // OldReg may have been unused but is used now, clear the Dead flag + MachineInstr *Def = MRI->getUniqueVRegDef(NewReg); + assert(Def != nullptr && "CSEd register has no unique definition?"); + Def->clearRegisterDeads(NewReg); + // Replace with NewReg and clear kill flags which may be wrong now. + MRI->replaceRegWith(OldReg, NewReg); + MRI->clearKillFlags(NewReg); } // Go through implicit defs of CSMI and MI, if a def is not dead at MI, @@ -582,6 +596,29 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { for (unsigned i = 0, e = ImplicitDefsToUpdate.size(); i != e; ++i) CSMI->getOperand(ImplicitDefsToUpdate[i]).setIsDead(false); + // Go through implicit defs of CSMI and MI, and clear the kill flags on + // their uses in all the instructions between CSMI and MI. + // We might have made some of the kill flags redundant, consider: + // subs ... %NZCV<imp-def> <- CSMI + // csinc ... %NZCV<imp-use,kill> <- this kill flag isn't valid anymore + // subs ... %NZCV<imp-def> <- MI, to be eliminated + // csinc ... %NZCV<imp-use,kill> + // Since we eliminated MI, and reused a register imp-def'd by CSMI + // (here %NZCV), that register, if it was killed before MI, should have + // that kill flag removed, because it's lifetime was extended. + if (CSMI->getParent() == MI->getParent()) { + for (MachineBasicBlock::iterator II = CSMI, IE = MI; II != IE; ++II) + for (auto ImplicitDef : ImplicitDefs) + if (MachineOperand *MO = II->findRegisterUseOperand( + ImplicitDef, /*isKill=*/true, TRI)) + MO->setIsKill(false); + } else { + // If the instructions aren't in the same BB, bail out and clear the + // kill flag on all uses of the imp-def'd register. + for (auto ImplicitDef : ImplicitDefs) + MRI->clearKillFlags(ImplicitDef); + } + if (CrossMBBPhysDef) { // Add physical register defs now coming in from a predecessor to MBB // livein list. @@ -606,6 +643,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { } CSEPairs.clear(); ImplicitDefsToUpdate.clear(); + ImplicitDefs.clear(); } return Changed; diff --git a/lib/CodeGen/MachineCombiner.cpp b/lib/CodeGen/MachineCombiner.cpp index 2931258..41045ac 100644 --- a/lib/CodeGen/MachineCombiner.cpp +++ b/lib/CodeGen/MachineCombiner.cpp @@ -45,7 +45,7 @@ class MachineCombiner : public MachineFunctionPass { TargetSchedModel TSchedModel; - /// OptSize - True if optimizing for code size. + /// True if optimizing for code size. bool OptSize; public: @@ -109,7 +109,7 @@ MachineInstr *MachineCombiner::getOperandDef(const MachineOperand &MO) { return DefInstr; } -/// getDepth - Computes depth of instructions in vector \InsInstr. +/// Computes depth of instructions in vector \InsInstr. /// /// \param InsInstrs is a vector of machine instructions /// \param InstrIdxForVirtReg is a dense map of virtual register to index @@ -125,7 +125,7 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs, SmallVector<unsigned, 16> InstrDepth; assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n"); - // Foreach instruction in in the new sequence compute the depth based on the + // For each instruction in the new sequence compute the depth based on the // operands. Use the trace information when possible. For new operands which // are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth for (auto *InstrPtr : InsInstrs) { // for each Use @@ -169,8 +169,7 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs, return InstrDepth[NewRootIdx]; } -/// getLatency - Computes instruction latency as max of latency of defined -/// operands +/// Computes instruction latency as max of latency of defined operands. /// /// \param Root is a machine instruction that could be replaced by NewRoot. /// It is used to compute a more accurate latency information for NewRoot in @@ -211,12 +210,12 @@ unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot, return NewRootLatency; } -/// preservesCriticalPathlen - True when the new instruction sequence does not +/// True when the new instruction sequence does not /// lengthen the critical path. The DAGCombine code sequence ends in MI /// (Machine Instruction) Root. The new code sequence ends in MI NewRoot. A /// necessary condition for the new sequence to replace the old sequence is that -/// is cannot lengthen the critical path. This is decided by the formula -/// (NewRootDepth + NewRootLatency) <= (RootDepth + RootLatency + RootSlack)). +/// it cannot lengthen the critical path. This is decided by the formula +/// (NewRootDepth + NewRootLatency) <= (RootDepth + RootLatency + RootSlack)). /// The slack is the number of cycles Root can be delayed before the critical /// patch becomes longer. bool MachineCombiner::preservesCriticalPathLen( @@ -264,8 +263,7 @@ void MachineCombiner::instr2instrSC( InstrsSC.push_back(SC); } } -/// preservesResourceLen - True when the new instructions do not increase -/// resource length +/// True when the new instructions do not increase resource length bool MachineCombiner::preservesResourceLen( MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace, SmallVectorImpl<MachineInstr *> &InsInstrs, @@ -300,7 +298,7 @@ bool MachineCombiner::preservesResourceLen( } /// \returns true when new instruction sequence should be generated -/// independent if it lenghtens critical path or not +/// independent if it lengthens critical path or not bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) { if (OptSize && (NewSize < OldSize)) return true; @@ -309,7 +307,7 @@ bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) { return false; } -/// combineInstructions - substitute a slow code sequence with a faster one by +/// Substitute a slow code sequence with a faster one by /// evaluating instruction combining pattern. /// The prototype of such a pattern is MUl + ADD -> MADD. Performs instruction /// combining based on machine trace metrics. Only combine a sequence of @@ -406,8 +404,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { } bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) { - const TargetSubtargetInfo &STI = - MF.getTarget().getSubtarget<TargetSubtargetInfo>(); + const TargetSubtargetInfo &STI = MF.getSubtarget(); TII = STI.getInstrInfo(); TRI = STI.getRegisterInfo(); SchedModel = STI.getSchedModel(); @@ -416,8 +413,7 @@ bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) { Traces = &getAnalysis<MachineTraceMetrics>(); MinInstr = 0; - OptSize = MF.getFunction()->getAttributes().hasAttribute( - AttributeSet::FunctionIndex, Attribute::OptimizeForSize); + OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize); DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n'); if (!TII->useMachineCombiner()) { diff --git a/lib/CodeGen/MachineDominanceFrontier.cpp b/lib/CodeGen/MachineDominanceFrontier.cpp index 0bee846..acb7c48 100644 --- a/lib/CodeGen/MachineDominanceFrontier.cpp +++ b/lib/CodeGen/MachineDominanceFrontier.cpp @@ -8,8 +8,8 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineDominanceFrontier.h" -#include "llvm/CodeGen/MachineDominators.h" #include "llvm/Analysis/DominanceFrontierImpl.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/Passes.h" diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 8a2b610..151a260 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -67,17 +67,14 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM, STI->getFrameLowering()->isStackRealignable(), !F->hasFnAttribute("no-realign-stack")); - if (Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::StackAlignment)) - FrameInfo->ensureMaxAlignment(Fn->getAttributes(). - getStackAlignment(AttributeSet::FunctionIndex)); + if (Fn->hasFnAttribute(Attribute::StackAlignment)) + FrameInfo->ensureMaxAlignment(Fn->getFnStackAlignment()); ConstantPool = new (Allocator) MachineConstantPool(TM); Alignment = STI->getTargetLowering()->getMinFunctionAlignment(); // FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn. - if (!Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::OptimizeForSize)) + if (!Fn->hasFnAttribute(Attribute::OptimizeForSize)) Alignment = std::max(Alignment, STI->getTargetLowering()->getPrefFunctionAlignment()); @@ -462,7 +459,7 @@ unsigned MachineFunction::addLiveIn(unsigned PReg, /// normal 'L' label is returned. MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate) const { - const DataLayout *DL = getSubtarget().getDataLayout(); + const DataLayout *DL = getTarget().getDataLayout(); assert(JumpTableInfo && "No jump tables"); assert(JTI < JumpTableInfo->getJumpTables().size() && "Invalid JTI!"); @@ -477,7 +474,7 @@ MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, /// getPICBaseSymbol - Return a function-local symbol to represent the PIC /// base. MCSymbol *MachineFunction::getPICBaseSymbol() const { - const DataLayout *DL = getSubtarget().getDataLayout(); + const DataLayout *DL = getTarget().getDataLayout(); return Ctx.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+ Twine(getFunctionNumber())+"$pb"); } @@ -587,13 +584,20 @@ int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size, return -++NumFixedObjects; } +int MachineFrameInfo::CreateFrameAllocation(uint64_t Size) { + // Force the use of a frame pointer. The intention is that this intrinsic be + // used in conjunction with unwind mechanisms that leak the frame pointer. + setFrameAddressIsTaken(true); + Size = RoundUpToAlignment(Size, StackAlignment); + return CreateStackObject(Size, StackAlignment, false); +} + BitVector MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const { assert(MBB && "MBB must be valid"); const MachineFunction *MF = MBB->getParent(); assert(MF && "MBB must be part of a MachineFunction"); - const TargetMachine &TM = MF->getTarget(); - const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); BitVector BV(TRI->getNumRegs()); // Before CSI is calculated, no registers are considered pristine. They can be @@ -813,7 +817,7 @@ void MachineJumpTableInfo::dump() const { print(dbgs()); } void MachineConstantPoolValue::anchor() { } const DataLayout *MachineConstantPool::getDataLayout() const { - return TM.getSubtargetImpl()->getDataLayout(); + return TM.getDataLayout(); } Type *MachineConstantPoolEntry::getType() const { @@ -835,13 +839,13 @@ MachineConstantPoolEntry::getSectionKind(const DataLayout *DL) const { switch (getRelocationInfo()) { default: llvm_unreachable("Unknown section kind"); - case 2: + case Constant::GlobalRelocations: Kind = SectionKind::getReadOnlyWithRel(); break; - case 1: + case Constant::LocalRelocation: Kind = SectionKind::getReadOnlyWithRelLocal(); break; - case 0: + case Constant::NoRelocation: switch (DL->getTypeAllocSize(getType())) { case 4: Kind = SectionKind::getMergeableConst4(); @@ -853,7 +857,7 @@ MachineConstantPoolEntry::getSectionKind(const DataLayout *DL) const { Kind = SectionKind::getMergeableConst16(); break; default: - Kind = SectionKind::getMergeableConst(); + Kind = SectionKind::getReadOnly(); break; } } diff --git a/lib/CodeGen/MachineFunctionPass.cpp b/lib/CodeGen/MachineFunctionPass.cpp index 789f204..aaf06a7 100644 --- a/lib/CodeGen/MachineFunctionPass.cpp +++ b/lib/CodeGen/MachineFunctionPass.cpp @@ -11,11 +11,18 @@ // //===----------------------------------------------------------------------===// -#include "llvm/IR/Function.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/DominanceFrontier.h" +#include "llvm/Analysis/IVUsers.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" -#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/StackProtector.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" using namespace llvm; Pass *MachineFunctionPass::createPrinterPass(raw_ostream &O, @@ -43,15 +50,13 @@ void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const { // because CodeGen overloads that to mean preserving the MachineBasicBlock // CFG in addition to the LLVM IR CFG. AU.addPreserved<AliasAnalysis>(); - AU.addPreserved("scalar-evolution"); - AU.addPreserved("iv-users"); - AU.addPreserved("memdep"); - AU.addPreserved("live-values"); - AU.addPreserved("domtree"); - AU.addPreserved("domfrontier"); - AU.addPreserved("loops"); - AU.addPreserved("lda"); - AU.addPreserved("stack-protector"); + AU.addPreserved<DominanceFrontier>(); + AU.addPreserved<DominatorTreeWrapperPass>(); + AU.addPreserved<IVUsers>(); + AU.addPreserved<LoopInfoWrapperPass>(); + AU.addPreserved<MemoryDependenceAnalysis>(); + AU.addPreserved<ScalarEvolution>(); + AU.addPreserved<StackProtector>(); FunctionPass::getAnalysisUsage(AU); } diff --git a/lib/CodeGen/MachineFunctionPrinterPass.cpp b/lib/CodeGen/MachineFunctionPrinterPass.cpp index dee3977..790f5ac 100644 --- a/lib/CodeGen/MachineFunctionPrinterPass.cpp +++ b/lib/CodeGen/MachineFunctionPrinterPass.cpp @@ -52,7 +52,7 @@ char MachineFunctionPrinterPass::ID = 0; } char &llvm::MachineFunctionPrinterPassID = MachineFunctionPrinterPass::ID; -INITIALIZE_PASS(MachineFunctionPrinterPass, "print-machineinstrs", +INITIALIZE_PASS(MachineFunctionPrinterPass, "machineinstr-printer", "Machine Function Printer", false, false) namespace llvm { diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 7ad0d94..981e4a3 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -397,7 +397,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { break; case MachineOperand::MO_Metadata: OS << '<'; - getMetadata()->printAsOperand(OS, /*PrintType=*/false); + getMetadata()->printAsOperand(OS); OS << '>'; break; case MachineOperand::MO_MCSymbol: @@ -537,7 +537,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) { if (const MDNode *TBAAInfo = MMO.getAAInfo().TBAA) { OS << "(tbaa="; if (TBAAInfo->getNumOperands() > 0) - TBAAInfo->getOperand(0)->printAsOperand(OS, /*PrintType=*/false); + TBAAInfo->getOperand(0)->printAsOperand(OS); else OS << "<unknown>"; OS << ")"; @@ -548,7 +548,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) { OS << "(alias.scope="; if (ScopeInfo->getNumOperands() > 0) for (unsigned i = 0, ie = ScopeInfo->getNumOperands(); i != ie; ++i) { - ScopeInfo->getOperand(i)->printAsOperand(OS, /*PrintType=*/false); + ScopeInfo->getOperand(i)->printAsOperand(OS); if (i != ie-1) OS << ","; } @@ -562,7 +562,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) { OS << "(noalias="; if (NoAliasInfo->getNumOperands() > 0) for (unsigned i = 0, ie = NoAliasInfo->getNumOperands(); i != ie; ++i) { - NoAliasInfo->getOperand(i)->printAsOperand(OS, /*PrintType=*/false); + NoAliasInfo->getOperand(i)->printAsOperand(OS); if (i != ie-1) OS << ","; } @@ -595,10 +595,12 @@ void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) { /// implicit operands. It reserves space for the number of operands specified by /// the MCInstrDesc. MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid, - const DebugLoc dl, bool NoImp) - : MCID(&tid), Parent(nullptr), Operands(nullptr), NumOperands(0), - Flags(0), AsmPrinterFlags(0), - NumMemRefs(0), MemRefs(nullptr), debugLoc(dl) { + DebugLoc dl, bool NoImp) + : MCID(&tid), Parent(nullptr), Operands(nullptr), NumOperands(0), Flags(0), + AsmPrinterFlags(0), NumMemRefs(0), MemRefs(nullptr), + debugLoc(std::move(dl)) { + assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor"); + // Reserve space for the expected number of operands. if (unsigned NumOps = MCID->getNumOperands() + MCID->getNumImplicitDefs() + MCID->getNumImplicitUses()) { @@ -617,12 +619,14 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) Flags(0), AsmPrinterFlags(0), NumMemRefs(MI.NumMemRefs), MemRefs(MI.MemRefs), debugLoc(MI.getDebugLoc()) { + assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor"); + CapOperands = OperandCapacity::get(MI.getNumOperands()); Operands = MF.allocateOperandArray(CapOperands); // Copy operands. - for (unsigned i = 0; i != MI.getNumOperands(); ++i) - addOperand(MF, MI.getOperand(i)); + for (const MachineOperand &MO : MI.operands()) + addOperand(MF, MO); // Copy all the sensible flags. setFlags(MI.Flags); @@ -641,18 +645,18 @@ MachineRegisterInfo *MachineInstr::getRegInfo() { /// this instruction from their respective use lists. This requires that the /// operands already be on their use lists. void MachineInstr::RemoveRegOperandsFromUseLists(MachineRegisterInfo &MRI) { - for (unsigned i = 0, e = getNumOperands(); i != e; ++i) - if (Operands[i].isReg()) - MRI.removeRegOperandFromUseList(&Operands[i]); + for (MachineOperand &MO : operands()) + if (MO.isReg()) + MRI.removeRegOperandFromUseList(&MO); } /// AddRegOperandsToUseLists - Add all of the register operands in /// this instruction from their respective use lists. This requires that the /// operands not be on their use lists yet. void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &MRI) { - for (unsigned i = 0, e = getNumOperands(); i != e; ++i) - if (Operands[i].isReg()) - MRI.addRegOperandToUseList(&Operands[i]); + for (MachineOperand &MO : operands()) + if (MO.isReg()) + MRI.addRegOperandToUseList(&MO); } void MachineInstr::addOperand(const MachineOperand &Op) { @@ -670,14 +674,8 @@ static void moveOperands(MachineOperand *Dst, MachineOperand *Src, if (MRI) return MRI->moveOperands(Dst, Src, NumOps); - // Here it would be convenient to call memmove, so that isn't allowed because - // MachineOperand has a constructor and so isn't a POD type. - if (Dst < Src) - for (unsigned i = 0; i != NumOps; ++i) - new (Dst + i) MachineOperand(Src[i]); - else - for (unsigned i = NumOps; i ; --i) - new (Dst + i - 1) MachineOperand(Src[i - 1]); + // MachineOperand is a trivially copyable type so we can just use memmove. + std::memmove(Dst, Src, NumOps * sizeof(MachineOperand)); } /// addOperand - Add the specified operand to the instruction. If it is an @@ -922,8 +920,7 @@ void MachineInstr::eraseFromParentAndMarkDBGValuesForRemoval() { MachineInstr *MI = (MachineInstr *)this; MachineRegisterInfo &MRI = MF->getRegInfo(); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg() || !MO.isDef()) continue; unsigned Reg = MO.getReg(); @@ -1326,8 +1323,7 @@ unsigned MachineInstr::findTiedOperandIdx(unsigned OpIdx) const { /// clearKillInfo - Clears kill flags on all operands. /// void MachineInstr::clearKillInfo() { - for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { - MachineOperand &MO = getOperand(i); + for (MachineOperand &MO : operands()) { if (MO.isReg() && MO.isUse()) MO.setIsKill(false); } @@ -1340,15 +1336,13 @@ void MachineInstr::substituteRegister(unsigned FromReg, if (TargetRegisterInfo::isPhysicalRegister(ToReg)) { if (SubIdx) ToReg = RegInfo.getSubReg(ToReg, SubIdx); - for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { - MachineOperand &MO = getOperand(i); + for (MachineOperand &MO : operands()) { if (!MO.isReg() || MO.getReg() != FromReg) continue; MO.substPhysReg(ToReg, RegInfo); } } else { - for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { - MachineOperand &MO = getOperand(i); + for (MachineOperand &MO : operands()) { if (!MO.isReg() || MO.getReg() != FromReg) continue; MO.substVirtReg(ToReg, SubIdx, RegInfo); @@ -1491,8 +1485,7 @@ bool MachineInstr::hasUnmodeledSideEffects() const { /// allDefsAreDead - Return true if all the defs of this instruction are dead. /// bool MachineInstr::allDefsAreDead() const { - for (unsigned i = 0, e = getNumOperands(); i < e; ++i) { - const MachineOperand &MO = getOperand(i); + for (const MachineOperand &MO : operands()) { if (!MO.isReg() || MO.isUse()) continue; if (!MO.isDead()) @@ -1823,8 +1816,7 @@ void MachineInstr::clearRegisterKills(unsigned Reg, const TargetRegisterInfo *RegInfo) { if (!TargetRegisterInfo::isPhysicalRegister(Reg)) RegInfo = nullptr; - for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { - MachineOperand &MO = getOperand(i); + for (MachineOperand &MO : operands()) { if (!MO.isReg() || !MO.isUse() || !MO.isKill()) continue; unsigned OpReg = MO.getReg(); @@ -1885,6 +1877,22 @@ bool MachineInstr::addRegisterDead(unsigned Reg, return true; } +void MachineInstr::clearRegisterDeads(unsigned Reg) { + for (MachineOperand &MO : operands()) { + if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg) + continue; + MO.setIsDead(false); + } +} + +void MachineInstr::addRegisterDefReadUndef(unsigned Reg) { + for (MachineOperand &MO : operands()) { + if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg || MO.getSubReg() == 0) + continue; + MO.setIsUndef(); + } +} + void MachineInstr::addRegisterDefined(unsigned Reg, const TargetRegisterInfo *RegInfo) { if (TargetRegisterInfo::isPhysicalRegister(Reg)) { @@ -1892,8 +1900,7 @@ void MachineInstr::addRegisterDefined(unsigned Reg, if (MO) return; } else { - for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { - const MachineOperand &MO = getOperand(i); + for (const MachineOperand &MO : operands()) { if (MO.isReg() && MO.getReg() == Reg && MO.isDef() && MO.getSubReg() == 0) return; @@ -1907,8 +1914,7 @@ void MachineInstr::addRegisterDefined(unsigned Reg, void MachineInstr::setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs, const TargetRegisterInfo &TRI) { bool HasRegMask = false; - for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { - MachineOperand &MO = getOperand(i); + for (MachineOperand &MO : operands()) { if (MO.isRegMask()) { HasRegMask = true; continue; @@ -1916,15 +1922,10 @@ void MachineInstr::setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs, if (!MO.isReg() || !MO.isDef()) continue; unsigned Reg = MO.getReg(); if (!TargetRegisterInfo::isPhysicalRegister(Reg)) continue; - bool Dead = true; - for (ArrayRef<unsigned>::iterator I = UsedRegs.begin(), E = UsedRegs.end(); - I != E; ++I) - if (TRI.regsOverlap(*I, Reg)) { - Dead = false; - break; - } // If there are no uses, including partial uses, the def is dead. - if (Dead) MO.setIsDead(); + if (std::none_of(UsedRegs.begin(), UsedRegs.end(), + [&](unsigned Use) { return TRI.regsOverlap(Use, Reg); })) + MO.setIsDead(); } // This is a call with a register mask operand. @@ -1941,8 +1942,7 @@ MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) { SmallVector<size_t, 8> HashComponents; HashComponents.reserve(MI->getNumOperands() + 1); HashComponents.push_back(MI->getOpcode()); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (const MachineOperand &MO : MI->operands()) { if (MO.isReg() && MO.isDef() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue; // Skip virtual register defs. @@ -1960,7 +1960,8 @@ void MachineInstr::emitError(StringRef Msg) const { if (getOperand(i-1).isMetadata() && (LocMD = getOperand(i-1).getMetadata()) && LocMD->getNumOperands() != 0) { - if (const ConstantInt *CI = dyn_cast<ConstantInt>(LocMD->getOperand(0))) { + if (const ConstantInt *CI = + mdconst::dyn_extract<ConstantInt>(LocMD->getOperand(0))) { LocCookie = CI->getZExtValue(); break; } diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 2ab0467..64d0932 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -49,6 +49,11 @@ AvoidSpeculation("avoid-speculation", cl::desc("MachineLICM should avoid speculation"), cl::init(true), cl::Hidden); +static cl::opt<bool> +HoistCheapInsts("hoist-cheap-insts", + cl::desc("MachineLICM should hoist even cheap instructions"), + cl::init(false), cl::Hidden); + STATISTIC(NumHoisted, "Number of machine instructions hoisted out of loops"); STATISTIC(NumLowRP, @@ -688,6 +693,10 @@ void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node, /// one pass without iteration. /// void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { + MachineBasicBlock *Preheader = getCurPreheader(); + if (!Preheader) + return; + SmallVector<MachineDomTreeNode*, 32> Scopes; SmallVector<MachineDomTreeNode*, 8> WorkList; DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap; @@ -695,7 +704,7 @@ void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { // Perform a DFS walk to determine the order of visit. WorkList.push_back(HeaderN); - do { + while (!WorkList.empty()) { MachineDomTreeNode *Node = WorkList.pop_back_val(); assert(Node && "Null dominator tree node?"); MachineBasicBlock *BB = Node->getBlock(); @@ -729,28 +738,21 @@ void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { ParentMap[Child] = Node; WorkList.push_back(Child); } - } while (!WorkList.empty()); + } - if (Scopes.size() != 0) { - MachineBasicBlock *Preheader = getCurPreheader(); - if (!Preheader) - return; + if (Scopes.size() == 0) + return; - // Compute registers which are livein into the loop headers. - RegSeen.clear(); - BackTrace.clear(); - InitRegPressure(Preheader); - } + // Compute registers which are livein into the loop headers. + RegSeen.clear(); + BackTrace.clear(); + InitRegPressure(Preheader); // Now perform LICM. for (unsigned i = 0, e = Scopes.size(); i != e; ++i) { MachineDomTreeNode *Node = Scopes[i]; MachineBasicBlock *MBB = Node->getBlock(); - MachineBasicBlock *Preheader = getCurPreheader(); - if (!Preheader) - continue; - EnterScope(MBB); // Process the block @@ -1075,7 +1077,7 @@ bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost, // Don't hoist cheap instructions if they would increase register pressure, // even if we're under the limit. - if (CheapInstr) + if (CheapInstr && !HoistCheapInsts) return true; for (unsigned i = BackTrace.size(); i != 0; --i) { diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp index eb3c0bf..fca7df0 100644 --- a/lib/CodeGen/MachineModuleInfo.cpp +++ b/lib/CodeGen/MachineModuleInfo.cpp @@ -9,6 +9,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/ADT/PointerUnion.h" +#include "llvm/Analysis/LibCallSemantics.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -273,9 +274,10 @@ bool MachineModuleInfo::doInitialization(Module &M) { CurCallSite = 0; CallsEHReturn = 0; CallsUnwindInit = 0; - DbgInfoAvailable = UsesVAFloatArgument = false; + DbgInfoAvailable = UsesVAFloatArgument = UsesMorestackAddr = false; // Always emit some info, by default "no personality" info. Personalities.push_back(nullptr); + PersonalityTypeCache = EHPersonality::Unknown; AddrLabelSymbols = nullptr; TheModule = nullptr; @@ -452,6 +454,14 @@ void MachineModuleInfo::addCleanup(MachineBasicBlock *LandingPad) { LP.TypeIds.push_back(0); } +MCSymbol * +MachineModuleInfo::addClauseForLandingPad(MachineBasicBlock *LandingPad) { + MCSymbol *ClauseLabel = Context.CreateTempSymbol(); + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + LP.ClauseLabels.push_back(ClauseLabel); + return ClauseLabel; +} + /// TidyLandingPads - Remap landing pad labels and remove any deleted landing /// pads. void MachineModuleInfo::TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) { @@ -546,11 +556,17 @@ try_next:; /// getPersonality - Return the personality function for the current function. const Function *MachineModuleInfo::getPersonality() const { - // FIXME: Until PR1414 will be fixed, we're using 1 personality function per - // function - return !LandingPads.empty() ? LandingPads[0].Personality : nullptr; + for (const LandingPadInfo &LPI : LandingPads) + if (LPI.Personality) + return LPI.Personality; + return nullptr; } +EHPersonality MachineModuleInfo::getPersonalityType() { + if (PersonalityTypeCache == EHPersonality::Unknown) + PersonalityTypeCache = classifyEHPersonality(getPersonality()); + return PersonalityTypeCache; +} /// getPersonalityIndex - Return unique index for current personality /// function. NULL/first personality function should always get zero index. unsigned MachineModuleInfo::getPersonalityIndex() const { diff --git a/lib/CodeGen/MachineRegionInfo.cpp b/lib/CodeGen/MachineRegionInfo.cpp index 5a5035e..01d2c2e 100644 --- a/lib/CodeGen/MachineRegionInfo.cpp +++ b/lib/CodeGen/MachineRegionInfo.cpp @@ -1,8 +1,8 @@ #include "llvm/CodeGen/MachineRegionInfo.h" -#include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/RegionInfoImpl.h" +#include "llvm/CodeGen/MachinePostDominators.h" #define DEBUG_TYPE "region" diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index e9612f3..32b7db1 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -24,7 +24,8 @@ using namespace llvm; void MachineRegisterInfo::Delegate::anchor() {} MachineRegisterInfo::MachineRegisterInfo(const MachineFunction *MF) - : MF(MF), TheDelegate(nullptr), IsSSA(true), TracksLiveness(true) { + : MF(MF), TheDelegate(nullptr), IsSSA(true), TracksLiveness(true), + TracksSubRegLiveness(false) { VRegInfo.reserve(256); RegAllocHints.reserve(256); UsedRegUnits.resize(getTargetRegisterInfo()->getNumRegUnits()); @@ -60,8 +61,8 @@ MachineRegisterInfo::constrainRegClass(unsigned Reg, } bool -MachineRegisterInfo::recomputeRegClass(unsigned Reg, const TargetMachine &TM) { - const TargetInstrInfo *TII = TM.getSubtargetImpl()->getInstrInfo(); +MachineRegisterInfo::recomputeRegClass(unsigned Reg) { + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); const TargetRegisterClass *OldRC = getRegClass(Reg); const TargetRegisterClass *NewRC = getTargetRegisterInfo()->getLargestLegalSuperClass(OldRC); @@ -128,6 +129,7 @@ void MachineRegisterInfo::verifyUseList(unsigned Reg) const { << " use list MachineOperand " << MO << " has no parent instruction.\n"; Valid = false; + continue; } MachineOperand *MO0 = &MI->getOperand(0); unsigned NumOps = MI->getNumOperands(); @@ -391,6 +393,14 @@ MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB, } } +unsigned MachineRegisterInfo::getMaxLaneMaskForVReg(unsigned Reg) const +{ + // Lane masks are only defined for vregs. + assert(TargetRegisterInfo::isVirtualRegister(Reg)); + const TargetRegisterClass &TRC = *getRegClass(Reg); + return TRC.getLaneMask(); +} + #ifndef NDEBUG void MachineRegisterInfo::dumpUses(unsigned Reg) const { for (MachineInstr &I : use_instructions(Reg)) diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index 261942f..89ac6a8 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -144,12 +144,12 @@ char MachineScheduler::ID = 0; char &llvm::MachineSchedulerID = MachineScheduler::ID; -INITIALIZE_PASS_BEGIN(MachineScheduler, "misched", +INITIALIZE_PASS_BEGIN(MachineScheduler, "machine-scheduler", "Machine Instruction Scheduler", false, false) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_END(MachineScheduler, "misched", +INITIALIZE_PASS_END(MachineScheduler, "machine-scheduler", "Machine Instruction Scheduler", false, false) MachineScheduler::MachineScheduler() @@ -336,9 +336,7 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) { if (skipOptnoneFunction(*mf.getFunction())) return false; - const TargetSubtargetInfo &ST = - mf.getTarget().getSubtarget<TargetSubtargetInfo>(); - if (!ST.enablePostMachineScheduler()) { + if (!mf.getSubtarget().enablePostMachineScheduler()) { DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n"); return false; } @@ -430,9 +428,11 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) { // instruction stream until we find the nearest boundary. unsigned NumRegionInstrs = 0; MachineBasicBlock::iterator I = RegionEnd; - for(;I != MBB->begin(); --I, --RemainingInstrs, ++NumRegionInstrs) { + for(;I != MBB->begin(); --I, --RemainingInstrs) { if (isSchedBoundary(std::prev(I), MBB, MF, TII, IsPostRA)) break; + if (!I->isDebugValue()) + ++NumRegionInstrs; } // Notify the scheduler of the region, even if we may skip scheduling // it. Perhaps it still needs to be bundled. @@ -1432,12 +1432,15 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) { // Check if either the dest or source is local. If it's live across a back // edge, it's not local. Note that if both vregs are live across the back // edge, we cannot successfully contrain the copy without cyclic scheduling. - unsigned LocalReg = DstReg; - unsigned GlobalReg = SrcReg; + // If both the copy's source and dest are local live intervals, then we + // should treat the dest as the global for the purpose of adding + // constraints. This adds edges from source's other uses to the copy. + unsigned LocalReg = SrcReg; + unsigned GlobalReg = DstReg; LiveInterval *LocalLI = &LIS->getInterval(LocalReg); if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) { - LocalReg = SrcReg; - GlobalReg = DstReg; + LocalReg = DstReg; + GlobalReg = SrcReg; LocalLI = &LIS->getInterval(LocalReg); if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) return; diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index ba25bca..8337793 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -112,7 +112,7 @@ namespace { /// for the lifetime of an iteration. /// /// \return True if the edge is marked as toSplit, false otherwise. - /// False can be retruned if, for instance, this is not profitable. + /// False can be returned if, for instance, this is not profitable. bool PostponeSplitCriticalEdge(MachineInstr *MI, MachineBasicBlock *From, MachineBasicBlock *To, @@ -504,7 +504,7 @@ bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr *MI, // If SuccToSinkTo post dominates then also it may be profitable if MI // can further profitably sinked into another block in next round. bool BreakPHIEdge = false; - // FIXME - If finding successor is compile time expensive then catch results. + // FIXME - If finding successor is compile time expensive then cache results. if (MachineBasicBlock *MBB2 = FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge)) return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2); @@ -553,19 +553,6 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI, if (!TII->isSafeToMoveRegClassDefs(MRI->getRegClass(Reg))) return nullptr; - // FIXME: This picks a successor to sink into based on having one - // successor that dominates all the uses. However, there are cases where - // sinking can happen but where the sink point isn't a successor. For - // example: - // - // x = computation - // if () {} else {} - // use x - // - // the instruction could be sunk over the whole diamond for the - // if/then/else (or loop, etc), allowing it to be sunk into other blocks - // after that. - // Virtual register defs can only be sunk if all their uses are in blocks // dominated by one of the successors. if (SuccToSinkTo) { @@ -585,6 +572,23 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI, // higher priority, otherwise prioritize smaller loop depths. SmallVector<MachineBasicBlock*, 4> Succs(MBB->succ_begin(), MBB->succ_end()); + + // Handle cases where sinking can happen but where the sink point isn't a + // successor. For example: + // + // x = computation + // if () {} else {} + // use x + // + const std::vector<MachineDomTreeNode *> &Children = + DT->getNode(MBB)->getChildren(); + for (const auto &DTChild : Children) + // DomTree children of MBB that have MBB as immediate dominator are added. + if (DTChild->getIDom()->getBlock() == MI->getParent() && + // Skip MBBs already added to the Succs vector above. + !MBB->isSuccessor(DTChild->getBlock())) + Succs.push_back(DTChild->getBlock()); + // Sort Successors according to their loop depth or block frequency info. std::stable_sort( Succs.begin(), Succs.end(), diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp index 2cf87eb..8aacd1f 100644 --- a/lib/CodeGen/MachineTraceMetrics.cpp +++ b/lib/CodeGen/MachineTraceMetrics.cpp @@ -52,12 +52,11 @@ void MachineTraceMetrics::getAnalysisUsage(AnalysisUsage &AU) const { bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) { MF = &Func; - TII = MF->getSubtarget().getInstrInfo(); - TRI = MF->getSubtarget().getRegisterInfo(); + const TargetSubtargetInfo &ST = MF->getSubtarget(); + TII = ST.getInstrInfo(); + TRI = ST.getRegisterInfo(); MRI = &MF->getRegInfo(); Loops = &getAnalysis<MachineLoopInfo>(); - const TargetSubtargetInfo &ST = - MF->getTarget().getSubtarget<TargetSubtargetInfo>(); SchedModel.init(ST.getSchedModel(), &ST, TII); BlockInfo.resize(MF->getNumBlockIDs()); ProcResourceCycles.resize(MF->getNumBlockIDs() * diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 99f0583..bdb094f 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -42,6 +42,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" @@ -54,16 +55,13 @@ namespace { MachineVerifier(Pass *pass, const char *b) : PASS(pass), - Banner(b), - OutFileName(getenv("LLVM_VERIFY_MACHINEINSTRS")) + Banner(b) {} bool runOnMachineFunction(MachineFunction &MF); Pass *const PASS; const char *Banner; - const char *const OutFileName; - raw_ostream *OS; const MachineFunction *MF; const TargetMachine *TM; const TargetInstrInfo *TII; @@ -215,9 +213,9 @@ namespace { void report(const char *msg, const MachineBasicBlock *MBB, const LiveInterval &LI); void report(const char *msg, const MachineFunction *MF, - const LiveRange &LR, unsigned Reg); + const LiveRange &LR, unsigned Reg, unsigned LaneMask); void report(const char *msg, const MachineBasicBlock *MBB, - const LiveRange &LR, unsigned Reg); + const LiveRange &LR, unsigned Reg, unsigned LaneMask); void verifyInlineAsm(const MachineInstr *MI); @@ -230,20 +228,22 @@ namespace { void verifyLiveVariables(); void verifyLiveIntervals(); void verifyLiveInterval(const LiveInterval&); - void verifyLiveRangeValue(const LiveRange&, const VNInfo*, unsigned); + void verifyLiveRangeValue(const LiveRange&, const VNInfo*, unsigned, + unsigned); void verifyLiveRangeSegment(const LiveRange&, - const LiveRange::const_iterator I, unsigned); - void verifyLiveRange(const LiveRange&, unsigned); + const LiveRange::const_iterator I, unsigned, + unsigned); + void verifyLiveRange(const LiveRange&, unsigned, unsigned LaneMask = 0); void verifyStackFrame(); }; struct MachineVerifierPass : public MachineFunctionPass { static char ID; // Pass ID, replacement for typeid - const char *const Banner; + const std::string Banner; - MachineVerifierPass(const char *b = nullptr) - : MachineFunctionPass(ID), Banner(b) { + MachineVerifierPass(const std::string &banner = nullptr) + : MachineFunctionPass(ID), Banner(banner) { initializeMachineVerifierPassPass(*PassRegistry::getPassRegistry()); } @@ -253,7 +253,7 @@ namespace { } bool runOnMachineFunction(MachineFunction &MF) override { - MF.verify(this, Banner); + MF.verify(this, Banner.c_str()); return false; } }; @@ -264,7 +264,7 @@ char MachineVerifierPass::ID = 0; INITIALIZE_PASS(MachineVerifierPass, "machineverifier", "Verify generated machine code", false, false) -FunctionPass *llvm::createMachineVerifierPass(const char *Banner) { +FunctionPass *llvm::createMachineVerifierPass(const std::string &Banner) { return new MachineVerifierPass(Banner); } @@ -274,22 +274,6 @@ void MachineFunction::verify(Pass *p, const char *Banner) const { } bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { - raw_ostream *OutFile = nullptr; - if (OutFileName) { - std::error_code EC; - OutFile = new raw_fd_ostream(OutFileName, EC, - sys::fs::F_Append | sys::fs::F_Text); - if (EC) { - errs() << "Error opening '" << OutFileName << "': " << EC.message() - << '\n'; - exit(1); - } - - OS = OutFile; - } else { - OS = &errs(); - } - foundErrors = 0; this->MF = &MF; @@ -324,7 +308,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) { if (MBBI->getParent() != MFI) { report("Bad instruction parent pointer", MFI); - *OS << "Instruction: " << *MBBI; + errs() << "Instruction: " << *MBBI; continue; } @@ -360,9 +344,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { } visitMachineFunctionAfter(); - if (OutFile) - delete OutFile; - else if (foundErrors) + if (foundErrors) report_fatal_error("Found "+Twine(foundErrors)+" machine code errors."); // Clean up. @@ -379,70 +361,76 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { void MachineVerifier::report(const char *msg, const MachineFunction *MF) { assert(MF); - *OS << '\n'; + errs() << '\n'; if (!foundErrors++) { if (Banner) - *OS << "# " << Banner << '\n'; - MF->print(*OS, Indexes); + errs() << "# " << Banner << '\n'; + MF->print(errs(), Indexes); } - *OS << "*** Bad machine code: " << msg << " ***\n" + errs() << "*** Bad machine code: " << msg << " ***\n" << "- function: " << MF->getName() << "\n"; } void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) { assert(MBB); report(msg, MBB->getParent()); - *OS << "- basic block: BB#" << MBB->getNumber() + errs() << "- basic block: BB#" << MBB->getNumber() << ' ' << MBB->getName() << " (" << (const void*)MBB << ')'; if (Indexes) - *OS << " [" << Indexes->getMBBStartIdx(MBB) + errs() << " [" << Indexes->getMBBStartIdx(MBB) << ';' << Indexes->getMBBEndIdx(MBB) << ')'; - *OS << '\n'; + errs() << '\n'; } void MachineVerifier::report(const char *msg, const MachineInstr *MI) { assert(MI); report(msg, MI->getParent()); - *OS << "- instruction: "; + errs() << "- instruction: "; if (Indexes && Indexes->hasIndex(MI)) - *OS << Indexes->getInstructionIndex(MI) << '\t'; - MI->print(*OS, TM); + errs() << Indexes->getInstructionIndex(MI) << '\t'; + MI->print(errs(), TM); } void MachineVerifier::report(const char *msg, const MachineOperand *MO, unsigned MONum) { assert(MO); report(msg, MO->getParent()); - *OS << "- operand " << MONum << ": "; - MO->print(*OS, TM); - *OS << "\n"; + errs() << "- operand " << MONum << ": "; + MO->print(errs(), TM); + errs() << "\n"; } void MachineVerifier::report(const char *msg, const MachineFunction *MF, const LiveInterval &LI) { report(msg, MF); - *OS << "- interval: " << LI << '\n'; + errs() << "- interval: " << LI << '\n'; } void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB, const LiveInterval &LI) { report(msg, MBB); - *OS << "- interval: " << LI << '\n'; + errs() << "- interval: " << LI << '\n'; } void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB, - const LiveRange &LR, unsigned Reg) { + const LiveRange &LR, unsigned Reg, + unsigned LaneMask) { report(msg, MBB); - *OS << "- liverange: " << LR << '\n'; - *OS << "- register: " << PrintReg(Reg, TRI) << '\n'; + errs() << "- liverange: " << LR << '\n'; + errs() << "- register: " << PrintReg(Reg, TRI) << '\n'; + if (LaneMask != 0) + errs() << "- lanemask: " << format("%04X\n", LaneMask); } void MachineVerifier::report(const char *msg, const MachineFunction *MF, - const LiveRange &LR, unsigned Reg) { + const LiveRange &LR, unsigned Reg, + unsigned LaneMask) { report(msg, MF); - *OS << "- liverange: " << LR << '\n'; - *OS << "- register: " << PrintReg(Reg, TRI) << '\n'; + errs() << "- liverange: " << LR << '\n'; + errs() << "- register: " << PrintReg(Reg, TRI) << '\n'; + if (LaneMask != 0) + errs() << "- lanemask: " << format("%04X\n", LaneMask); } void MachineVerifier::markReachable(const MachineBasicBlock *MBB) { @@ -530,7 +518,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { report("MBB has successor that isn't part of the function.", MBB); if (!MBBInfoMap[*I].Preds.count(MBB)) { report("Inconsistent CFG", MBB); - *OS << "MBB is not in the predecessor list of the successor BB#" + errs() << "MBB is not in the predecessor list of the successor BB#" << (*I)->getNumber() << ".\n"; } } @@ -542,7 +530,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { report("MBB has predecessor that isn't part of the function.", MBB); if (!MBBInfoMap[*I].Succs.count(MBB)) { report("Inconsistent CFG", MBB); - *OS << "MBB is not in the successor list of the predecessor BB#" + errs() << "MBB is not in the successor list of the predecessor BB#" << (*I)->getNumber() << ".\n"; } } @@ -592,7 +580,11 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { } } else if (TBB && !FBB && Cond.empty()) { // Block unconditionally branches somewhere. - if (MBB->succ_size() != 1+LandingPadSuccs.size()) { + // If the block has exactly one successor, that happens to be a + // landingpad, accept it as valid control flow. + if (MBB->succ_size() != 1+LandingPadSuccs.size() && + (MBB->succ_size() != 1 || LandingPadSuccs.size() != 1 || + *MBB->succ_begin() != *LandingPadSuccs.begin())) { report("MBB exits via unconditional branch but doesn't have " "exactly one CFG successor!", MBB); } else if (!MBB->isSuccessor(TBB)) { @@ -713,7 +705,7 @@ void MachineVerifier::visitMachineBundleBefore(const MachineInstr *MI) { SlotIndex idx = Indexes->getInstructionIndex(MI); if (!(idx > lastIndex)) { report("Instruction index out of order", MI); - *OS << "Last instruction was at " << lastIndex << '\n'; + errs() << "Last instruction was at " << lastIndex << '\n'; } lastIndex = idx; } @@ -726,7 +718,7 @@ void MachineVerifier::visitMachineBundleBefore(const MachineInstr *MI) { FirstTerminator = MI; } else if (FirstTerminator) { report("Non-terminator instruction after the first terminator", MI); - *OS << "First terminator was:\t" << *FirstTerminator; + errs() << "First terminator was:\t" << *FirstTerminator; } } @@ -778,7 +770,7 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { const MCInstrDesc &MCID = MI->getDesc(); if (MI->getNumOperands() < MCID.getNumOperands()) { report("Too few operands", MI); - *OS << MCID.getNumOperands() << " operands expected, but " + errs() << MCID.getNumOperands() << " operands expected, but " << MI->getNumOperands() << " given.\n"; } @@ -908,7 +900,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { TII->getRegClass(MCID, MONum, TRI, *MF)) { if (!DRC->contains(Reg)) { report("Illegal physical register for instruction", MO, MONum); - *OS << TRI->getName(Reg) << " is not a " + errs() << TRI->getName(Reg) << " is not a " << TRI->getRegClassName(DRC) << " register.\n"; } } @@ -920,13 +912,13 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { TRI->getSubClassWithSubReg(RC, SubIdx); if (!SRC) { report("Invalid subregister index for virtual register", MO, MONum); - *OS << "Register class " << TRI->getRegClassName(RC) + errs() << "Register class " << TRI->getRegClassName(RC) << " does not support subreg index " << SubIdx << "\n"; return; } if (RC != SRC) { report("Invalid register class for subregister index", MO, MONum); - *OS << "Register class " << TRI->getRegClassName(RC) + errs() << "Register class " << TRI->getRegClassName(RC) << " does not fully support subreg index " << SubIdx << "\n"; return; } @@ -948,7 +940,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { } if (!RC->hasSuperClassEq(DRC)) { report("Illegal virtual register for instruction", MO, MONum); - *OS << "Expected a " << TRI->getRegClassName(DRC) + errs() << "Expected a " << TRI->getRegClassName(DRC) << " register, but got a " << TRI->getRegClassName(RC) << " register\n"; } @@ -974,11 +966,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { SlotIndex Idx = LiveInts->getInstructionIndex(MI); if (MI->mayLoad() && !LI.liveAt(Idx.getRegSlot(true))) { report("Instruction loads from dead spill slot", MO, MONum); - *OS << "Live stack: " << LI << '\n'; + errs() << "Live stack: " << LI << '\n'; } if (MI->mayStore() && !LI.liveAt(Idx.getRegSlot())) { report("Instruction stores to dead spill slot", MO, MONum); - *OS << "Live stack: " << LI << '\n'; + errs() << "Live stack: " << LI << '\n'; } } break; @@ -1017,12 +1009,12 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { LiveQueryResult LRQ = LR->Query(UseIdx); if (!LRQ.valueIn()) { report("No live segment at use", MO, MONum); - *OS << UseIdx << " is not live in " << PrintRegUnit(*Units, TRI) + errs() << UseIdx << " is not live in " << PrintRegUnit(*Units, TRI) << ' ' << *LR << '\n'; } if (MO->isKill() && !LRQ.isKill()) { report("Live range continues after kill flag", MO, MONum); - *OS << PrintRegUnit(*Units, TRI) << ' ' << *LR << '\n'; + errs() << PrintRegUnit(*Units, TRI) << ' ' << *LR << '\n'; } } } @@ -1035,13 +1027,13 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { LiveQueryResult LRQ = LI.Query(UseIdx); if (!LRQ.valueIn()) { report("No live segment at use", MO, MONum); - *OS << UseIdx << " is not live in " << LI << '\n'; + errs() << UseIdx << " is not live in " << LI << '\n'; } // Check for extra kill flags. // Note that we allow missing kill flags for now. if (MO->isKill() && !LRQ.isKill()) { report("Live range continues after kill flag", MO, MONum); - *OS << "Live range: " << LI << '\n'; + errs() << "Live range: " << LI << '\n'; } } else { report("Virtual register has no live interval", MO, MONum); @@ -1053,7 +1045,37 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { if (!regsLive.count(Reg)) { if (TargetRegisterInfo::isPhysicalRegister(Reg)) { // Reserved registers may be used even when 'dead'. - if (!isReserved(Reg)) + bool Bad = !isReserved(Reg); + // We are fine if just any subregister has a defined value. + if (Bad) { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); + ++SubRegs) { + if (regsLive.count(*SubRegs)) { + Bad = false; + break; + } + } + } + // If there is an additional implicit-use of a super register we stop + // here. By definition we are fine if the super register is not + // (completely) dead, if the complete super register is dead we will + // get a report for its operand. + if (Bad) { + for (const MachineOperand &MOP : MI->uses()) { + if (!MOP.isReg()) + continue; + if (!MOP.isImplicit()) + continue; + for (MCSubRegIterator SubRegs(MOP.getReg(), TRI); SubRegs.isValid(); + ++SubRegs) { + if (*SubRegs == Reg) { + Bad = false; + break; + } + } + } + } + if (Bad) report("Using an undefined physical register", MO, MONum); } else if (MRI->def_empty(Reg)) { report("Reading virtual register without a def", MO, MONum); @@ -1094,19 +1116,19 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { assert(VNI && "NULL valno is not allowed"); if (VNI->def != DefIdx) { report("Inconsistent valno->def", MO, MONum); - *OS << "Valno " << VNI->id << " is not defined at " + errs() << "Valno " << VNI->id << " is not defined at " << DefIdx << " in " << LI << '\n'; } } else { report("No live segment at def", MO, MONum); - *OS << DefIdx << " is not live in " << LI << '\n'; + errs() << DefIdx << " is not live in " << LI << '\n'; } // Check that, if the dead def flag is present, LiveInts agree. if (MO->isDead()) { LiveQueryResult LRQ = LI.Query(DefIdx); if (!LRQ.isDeadDef()) { report("Live range continues after dead def flag", MO, MONum); - *OS << "Live range: " << LI << '\n'; + errs() << "Live range: " << LI << '\n'; } } } else { @@ -1148,7 +1170,7 @@ MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) { SlotIndex stop = Indexes->getMBBEndIdx(MBB); if (!(stop > lastIndex)) { report("Block ends before last instruction index", MBB); - *OS << "Block ends at " << stop + errs() << "Block ends at " << stop << " last instruction was at " << lastIndex << '\n'; } lastIndex = stop; @@ -1250,7 +1272,7 @@ void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) { PrE = MBB->pred_end(); PrI != PrE; ++PrI) { if (!seen.count(*PrI)) { report("Missing PHI operand", &BBI); - *OS << "BB#" << (*PrI)->getNumber() + errs() << "BB#" << (*PrI)->getNumber() << " is a predecessor according to the CFG.\n"; } } @@ -1281,7 +1303,7 @@ void MachineVerifier::visitMachineFunctionAfter() { ++I) if (MInfo.regsKilled.count(*I)) { report("Virtual register killed in block, but needed live out.", &MBB); - *OS << "Virtual register " << PrintReg(*I) + errs() << "Virtual register " << PrintReg(*I) << " is used after the block.\n"; } } @@ -1313,13 +1335,13 @@ void MachineVerifier::verifyLiveVariables() { if (MInfo.vregsRequired.count(Reg)) { if (!VI.AliveBlocks.test(MBB.getNumber())) { report("LiveVariables: Block missing from AliveBlocks", &MBB); - *OS << "Virtual register " << PrintReg(Reg) + errs() << "Virtual register " << PrintReg(Reg) << " must be live through the block.\n"; } } else { if (VI.AliveBlocks.test(MBB.getNumber())) { report("LiveVariables: Block should not be in AliveBlocks", &MBB); - *OS << "Virtual register " << PrintReg(Reg) + errs() << "Virtual register " << PrintReg(Reg) << " is not needed live through the block.\n"; } } @@ -1338,7 +1360,7 @@ void MachineVerifier::verifyLiveIntervals() { if (!LiveInts->hasInterval(Reg)) { report("Missing live interval for virtual register", MF); - *OS << PrintReg(Reg, TRI) << " still has defs or uses\n"; + errs() << PrintReg(Reg, TRI) << " still has defs or uses\n"; continue; } @@ -1354,38 +1376,40 @@ void MachineVerifier::verifyLiveIntervals() { } void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR, - const VNInfo *VNI, - unsigned Reg) { + const VNInfo *VNI, unsigned Reg, + unsigned LaneMask) { if (VNI->isUnused()) return; const VNInfo *DefVNI = LR.getVNInfoAt(VNI->def); if (!DefVNI) { - report("Valno not live at def and not marked unused", MF, LR, Reg); - *OS << "Valno #" << VNI->id << '\n'; + report("Valno not live at def and not marked unused", MF, LR, Reg, + LaneMask); + errs() << "Valno #" << VNI->id << '\n'; return; } if (DefVNI != VNI) { - report("Live segment at def has different valno", MF, LR, Reg); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def + report("Live segment at def has different valno", MF, LR, Reg, LaneMask); + errs() << "Valno #" << VNI->id << " is defined at " << VNI->def << " where valno #" << DefVNI->id << " is live\n"; return; } const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def); if (!MBB) { - report("Invalid definition index", MF, LR, Reg); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def + report("Invalid definition index", MF, LR, Reg, LaneMask); + errs() << "Valno #" << VNI->id << " is defined at " << VNI->def << " in " << LR << '\n'; return; } if (VNI->isPHIDef()) { if (VNI->def != LiveInts->getMBBStartIdx(MBB)) { - report("PHIDef value is not defined at MBB start", MBB, LR, Reg); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def + report("PHIDef value is not defined at MBB start", MBB, LR, Reg, + LaneMask); + errs() << "Valno #" << VNI->id << " is defined at " << VNI->def << ", not at the beginning of BB#" << MBB->getNumber() << '\n'; } return; @@ -1394,8 +1418,8 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR, // Non-PHI def. const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def); if (!MI) { - report("No instruction at def index", MBB, LR, Reg); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; + report("No instruction at def index", MBB, LR, Reg, LaneMask); + errs() << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; return; } @@ -1413,6 +1437,9 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR, !TRI->hasRegUnit(MOI->getReg(), Reg)) continue; } + if (LaneMask != 0 && + (TRI->getSubRegIndexLaneMask(MOI->getSubReg()) & LaneMask) == 0) + continue; hasDef = true; if (MOI->isEarlyClobber()) isEarlyClobber = true; @@ -1420,7 +1447,7 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR, if (!hasDef) { report("Defining instruction does not modify register", MI); - *OS << "Valno #" << VNI->id << " in " << LR << '\n'; + errs() << "Valno #" << VNI->id << " in " << LR << '\n'; } // Early clobber defs begin at USE slots, but other defs must begin at @@ -1428,51 +1455,52 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR, if (isEarlyClobber) { if (!VNI->def.isEarlyClobber()) { report("Early clobber def must be at an early-clobber slot", MBB, LR, - Reg); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; + Reg, LaneMask); + errs() << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; } } else if (!VNI->def.isRegister()) { report("Non-PHI, non-early clobber def must be at a register slot", - MBB, LR, Reg); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; + MBB, LR, Reg, LaneMask); + errs() << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; } } } void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, const LiveRange::const_iterator I, - unsigned Reg) { + unsigned Reg, unsigned LaneMask) { const LiveRange::Segment &S = *I; const VNInfo *VNI = S.valno; assert(VNI && "Live segment has no valno"); if (VNI->id >= LR.getNumValNums() || VNI != LR.getValNumInfo(VNI->id)) { - report("Foreign valno in live segment", MF, LR, Reg); - *OS << S << " has a bad valno\n"; + report("Foreign valno in live segment", MF, LR, Reg, LaneMask); + errs() << S << " has a bad valno\n"; } if (VNI->isUnused()) { - report("Live segment valno is marked unused", MF, LR, Reg); - *OS << S << '\n'; + report("Live segment valno is marked unused", MF, LR, Reg, LaneMask); + errs() << S << '\n'; } const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(S.start); if (!MBB) { - report("Bad start of live segment, no basic block", MF, LR, Reg); - *OS << S << '\n'; + report("Bad start of live segment, no basic block", MF, LR, Reg, LaneMask); + errs() << S << '\n'; return; } SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB); if (S.start != MBBStartIdx && S.start != VNI->def) { - report("Live segment must begin at MBB entry or valno def", MBB, LR, Reg); - *OS << S << '\n'; + report("Live segment must begin at MBB entry or valno def", MBB, LR, Reg, + LaneMask); + errs() << S << '\n'; } const MachineBasicBlock *EndMBB = LiveInts->getMBBFromIndex(S.end.getPrevSlot()); if (!EndMBB) { - report("Bad end of live segment, no basic block", MF, LR, Reg); - *OS << S << '\n'; + report("Bad end of live segment, no basic block", MF, LR, Reg, LaneMask); + errs() << S << '\n'; return; } @@ -1489,15 +1517,17 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, const MachineInstr *MI = LiveInts->getInstructionFromIndex(S.end.getPrevSlot()); if (!MI) { - report("Live segment doesn't end at a valid instruction", EndMBB, LR, Reg); - *OS << S << '\n'; + report("Live segment doesn't end at a valid instruction", EndMBB, LR, Reg, + LaneMask); + errs() << S << '\n'; return; } // The block slot must refer to a basic block boundary. if (S.end.isBlock()) { - report("Live segment ends at B slot of an instruction", EndMBB, LR, Reg); - *OS << S << '\n'; + report("Live segment ends at B slot of an instruction", EndMBB, LR, Reg, + LaneMask); + errs() << S << '\n'; } if (S.end.isDead()) { @@ -1505,8 +1535,8 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, // That means there must be a dead def. if (!SlotIndex::isSameInstr(S.start, S.end)) { report("Live segment ending at dead slot spans instructions", EndMBB, LR, - Reg); - *OS << S << '\n'; + Reg, LaneMask); + errs() << S << '\n'; } } @@ -1515,8 +1545,9 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, if (S.end.isEarlyClobber()) { if (I+1 == LR.end() || (I+1)->start != S.end) { report("Live segment ending at early clobber slot must be " - "redefined by an EC def in the same instruction", EndMBB, LR, Reg); - *OS << S << '\n'; + "redefined by an EC def in the same instruction", EndMBB, LR, Reg, + LaneMask); + errs() << S << '\n'; } } @@ -1526,16 +1557,27 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, // A live segment can end with either a redefinition, a kill flag on a // use, or a dead flag on a def. bool hasRead = false; + bool hasSubRegDef = false; for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) { if (!MOI->isReg() || MOI->getReg() != Reg) continue; + if (LaneMask != 0 && + (LaneMask & TRI->getSubRegIndexLaneMask(MOI->getSubReg())) == 0) + continue; + if (MOI->isDef() && MOI->getSubReg() != 0) + hasSubRegDef = true; if (MOI->readsReg()) hasRead = true; } if (!S.end.isDead()) { if (!hasRead) { - report("Instruction ending live segment doesn't read the register", MI); - *OS << S << " in " << LR << '\n'; + // When tracking subregister liveness, the main range must start new + // values on partial register writes, even if there is no read. + if (!MRI->tracksSubRegLiveness() || LaneMask != 0 || !hasSubRegDef) { + report("Instruction ending live segment doesn't read the register", + MI); + errs() << S << " in " << LR << '\n'; + } } } } @@ -1573,8 +1615,9 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, // All predecessors must have a live-out value. if (!PVNI) { - report("Register not marked live out of predecessor", *PI, LR, Reg); - *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber() + report("Register not marked live out of predecessor", *PI, LR, Reg, + LaneMask); + errs() << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber() << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before " << PEnd << '\n'; continue; @@ -1582,8 +1625,9 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, // Only PHI-defs can take different predecessor values. if (!IsPHI && PVNI != VNI) { - report("Different value live out of predecessor", *PI, LR, Reg); - *OS << "Valno #" << PVNI->id << " live out of BB#" + report("Different value live out of predecessor", *PI, LR, Reg, + LaneMask); + errs() << "Valno #" << PVNI->id << " live out of BB#" << (*PI)->getNumber() << '@' << PEnd << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber() << '@' << LiveInts->getMBBStartIdx(MFI) << '\n'; @@ -1595,18 +1639,36 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, } } -void MachineVerifier::verifyLiveRange(const LiveRange &LR, unsigned Reg) { - for (LiveRange::const_vni_iterator I = LR.vni_begin(), E = LR.vni_end(); - I != E; ++I) - verifyLiveRangeValue(LR, *I, Reg); +void MachineVerifier::verifyLiveRange(const LiveRange &LR, unsigned Reg, + unsigned LaneMask) { + for (const VNInfo *VNI : LR.valnos) + verifyLiveRangeValue(LR, VNI, Reg, LaneMask); for (LiveRange::const_iterator I = LR.begin(), E = LR.end(); I != E; ++I) - verifyLiveRangeSegment(LR, I, Reg); + verifyLiveRangeSegment(LR, I, Reg, LaneMask); } void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) { verifyLiveRange(LI, LI.reg); + unsigned Reg = LI.reg; + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + unsigned Mask = 0; + unsigned MaxMask = MRI->getMaxLaneMaskForVReg(Reg); + for (const LiveInterval::SubRange &SR : LI.subranges()) { + if ((Mask & SR.LaneMask) != 0) + report("Lane masks of sub ranges overlap in live interval", MF, LI); + if ((SR.LaneMask & ~MaxMask) != 0) + report("Subrange lanemask is invalid", MF, LI); + Mask |= SR.LaneMask; + verifyLiveRange(SR, LI.reg, SR.LaneMask); + if (!LI.covers(SR)) + report("A Subrange is not covered by the main range", MF, LI); + } + } else if (LI.hasSubRanges()) { + report("subregister liveness only allowed for virtual registers", MF, LI); + } + // Check the LI only has one connected component. if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { ConnectedVNInfoEqClasses ConEQ(*LiveInts); @@ -1614,12 +1676,12 @@ void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) { if (NumComp > 1) { report("Multiple connected components in live interval", MF, LI); for (unsigned comp = 0; comp != NumComp; ++comp) { - *OS << comp << ": valnos"; + errs() << comp << ": valnos"; for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end(); I!=E; ++I) if (comp == ConEQ.getEqClass(*I)) - *OS << ' ' << (*I)->id; - *OS << '\n'; + errs() << ' ' << (*I)->id; + errs() << '\n'; } } } @@ -1700,7 +1762,7 @@ void MachineVerifier::verifyStackFrame() { BBState.ExitValue; if (BBState.ExitIsSetup && AbsSPAdj != Size) { report("FrameDestroy <n> is after FrameSetup <m>", &I); - *OS << "FrameDestroy <" << Size << "> is after FrameSetup <" + errs() << "FrameDestroy <" << Size << "> is after FrameSetup <" << AbsSPAdj << ">.\n"; } BBState.ExitValue += Size; @@ -1717,7 +1779,7 @@ void MachineVerifier::verifyStackFrame() { (SPState[(*I)->getNumber()].ExitValue != BBState.EntryValue || SPState[(*I)->getNumber()].ExitIsSetup != BBState.EntryIsSetup)) { report("The exit stack state of a predecessor is inconsistent.", MBB); - *OS << "Predecessor BB#" << (*I)->getNumber() << " has exit state (" + errs() << "Predecessor BB#" << (*I)->getNumber() << " has exit state (" << SPState[(*I)->getNumber()].ExitValue << ", " << SPState[(*I)->getNumber()].ExitIsSetup << "), while BB#" << MBB->getNumber() << " has entry state (" @@ -1733,7 +1795,7 @@ void MachineVerifier::verifyStackFrame() { (SPState[(*I)->getNumber()].EntryValue != BBState.ExitValue || SPState[(*I)->getNumber()].EntryIsSetup != BBState.ExitIsSetup)) { report("The entry stack state of a successor is inconsistent.", MBB); - *OS << "Successor BB#" << (*I)->getNumber() << " has entry state (" + errs() << "Successor BB#" << (*I)->getNumber() << " has entry state (" << SPState[(*I)->getNumber()].EntryValue << ", " << SPState[(*I)->getNumber()].EntryIsSetup << "), while BB#" << MBB->getNumber() << " has exit state (" diff --git a/lib/CodeGen/OcamlGC.cpp b/lib/CodeGen/OcamlGC.cpp index 48db200..17654a6 100644 --- a/lib/CodeGen/OcamlGC.cpp +++ b/lib/CodeGen/OcamlGC.cpp @@ -20,16 +20,15 @@ using namespace llvm; namespace { - class OcamlGC : public GCStrategy { - public: - OcamlGC(); - }; +class OcamlGC : public GCStrategy { +public: + OcamlGC(); +}; } -static GCRegistry::Add<OcamlGC> -X("ocaml", "ocaml 3.10-compatible GC"); +static GCRegistry::Add<OcamlGC> X("ocaml", "ocaml 3.10-compatible GC"); -void llvm::linkOcamlGC() { } +void llvm::linkOcamlGC() {} OcamlGC::OcamlGC() { NeededSafePoints = 1 << GC::PostCall; diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index ec71d86..272d068 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -14,13 +14,12 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/Analysis/Passes.h" -#include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/IR/IRPrintingPasses.h" +#include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Verifier.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/PassManager.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -82,7 +81,9 @@ static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden, cl::desc("Dump garbage collector data")); static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden, cl::desc("Verify generated machine code"), - cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=nullptr)); + cl::init(false), + cl::ZeroOrMore); + static cl::opt<std::string> PrintMachineInstrs("print-machineinstrs", cl::ValueOptional, cl::desc("Print machine instrs"), @@ -235,8 +236,8 @@ TargetPassConfig::~TargetPassConfig() { // registers all common codegen passes. TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) : ImmutablePass(ID), PM(&pm), StartAfter(nullptr), StopAfter(nullptr), - Started(true), Stopped(false), TM(tm), Impl(nullptr), Initialized(false), - DisableVerify(false), + Started(true), Stopped(false), AddingMachinePasses(false), TM(tm), + Impl(nullptr), Initialized(false), DisableVerify(false), EnableTailMerge(true) { Impl = new PassConfigImpl(); @@ -250,7 +251,7 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) substitutePass(&PostRAMachineLICMID, &MachineLICMID); // Temporarily disable experimental passes. - const TargetSubtargetInfo &ST = TM->getSubtarget<TargetSubtargetInfo>(); + const TargetSubtargetInfo &ST = *TM->getSubtargetImpl(); if (!ST.useMachineScheduler()) disablePass(&MachineSchedulerID); } @@ -304,7 +305,7 @@ IdentifyingPassPtr TargetPassConfig::getPassSubstitution(AnalysisID ID) const { /// a later pass or that it should stop after an earlier pass, then do not add /// the pass. Finally, compare the current pass against the StartAfter /// and StopAfter options and change the Started/Stopped flags accordingly. -void TargetPassConfig::addPass(Pass *P) { +void TargetPassConfig::addPass(Pass *P, bool verifyAfter, bool printAfter) { assert(!Initialized && "PassConfig is immutable"); // Cache the Pass ID here in case the pass manager finds this pass is @@ -313,10 +314,21 @@ void TargetPassConfig::addPass(Pass *P) { // and shouldn't reference it. AnalysisID PassID = P->getPassID(); - if (Started && !Stopped) + if (Started && !Stopped) { + std::string Banner; + // Construct banner message before PM->add() as that may delete the pass. + if (AddingMachinePasses && (printAfter || verifyAfter)) + Banner = std::string("After ") + std::string(P->getPassName()); PM->add(P); - else + if (AddingMachinePasses) { + if (printAfter) + addPrintPass(Banner); + if (verifyAfter) + addVerifyPass(Banner); + } + } else { delete P; + } if (StopAfter == PassID) Stopped = true; if (StartAfter == PassID) @@ -330,7 +342,8 @@ void TargetPassConfig::addPass(Pass *P) { /// /// addPass cannot return a pointer to the pass instance because is internal the /// PassManager and the instance we create here may already be freed. -AnalysisID TargetPassConfig::addPass(AnalysisID PassID) { +AnalysisID TargetPassConfig::addPass(AnalysisID PassID, bool verifyAfter, + bool printAfter) { IdentifyingPassPtr TargetID = getPassSubstitution(PassID); IdentifyingPassPtr FinalPtr = overridePass(PassID, TargetID); if (!FinalPtr.isValid()) @@ -345,7 +358,7 @@ AnalysisID TargetPassConfig::addPass(AnalysisID PassID) { llvm_unreachable("Pass ID not registered"); } AnalysisID FinalID = P->getPassID(); - addPass(P); // Ends the lifetime of P. + addPass(P, verifyAfter, printAfter); // Ends the lifetime of P. // Add the passes after the pass P if there is any. for (SmallVectorImpl<std::pair<AnalysisID, IdentifyingPassPtr> >::iterator @@ -360,18 +373,25 @@ AnalysisID TargetPassConfig::addPass(AnalysisID PassID) { NP = Pass::createPass((*I).second.getID()); assert(NP && "Pass ID not registered"); } - addPass(NP); + addPass(NP, false, false); } } return FinalID; } -void TargetPassConfig::printAndVerify(const char *Banner) { +void TargetPassConfig::printAndVerify(const std::string &Banner) { + addPrintPass(Banner); + addVerifyPass(Banner); +} + +void TargetPassConfig::addPrintPass(const std::string &Banner) { if (TM->shouldPrintMachineCode()) - addPass(createMachineFunctionPrinterPass(dbgs(), Banner)); + PM->add(createMachineFunctionPrinterPass(dbgs(), Banner)); +} +void TargetPassConfig::addVerifyPass(const std::string &Banner) { if (VerifyMachineCode) - addPass(createMachineVerifierPass(Banner)); + PM->add(createMachineVerifierPass(Banner)); } /// Add common target configurable passes that perform LLVM IR to IR transforms @@ -401,7 +421,10 @@ void TargetPassConfig::addIRPasses() { addPass(createPrintFunctionPass(dbgs(), "\n\n*** Code after LSR ***\n")); } + // Run GC lowering passes for builtin collectors + // TODO: add a pass insertion point here addPass(createGCLoweringPass()); + addPass(createShadowStackGCLoweringPass()); // Make sure that no unreachable blocks are instruction selected. addPass(createUnreachableBlockEliminationPass()); @@ -429,9 +452,11 @@ void TargetPassConfig::addPassesToHandleExceptions() { // FALLTHROUGH case ExceptionHandling::DwarfCFI: case ExceptionHandling::ARM: - case ExceptionHandling::ItaniumWinEH: addPass(createDwarfEHPass(TM)); break; + case ExceptionHandling::WinEH: + addPass(createWinEHPass(TM)); + break; case ExceptionHandling::None: addPass(createLowerInvokePass()); @@ -491,6 +516,8 @@ void TargetPassConfig::addISelPrepare() { /// TODO: We could use a single addPre/Post(ID) hook to allow pass injection /// before/after any target-independent pass. But it's currently overkill. void TargetPassConfig::addMachinePasses() { + AddingMachinePasses = true; + // Insert a machine instr printer pass after the specified pass. // If -print-machineinstrs specified, print machineinstrs after all passes. if (StringRef(PrintMachineInstrs.getValue()).equals("")) @@ -499,7 +526,7 @@ void TargetPassConfig::addMachinePasses() { .equals("option-unspecified")) { const PassRegistry *PR = PassRegistry::getPassRegistry(); const PassInfo *TPI = PR->getPassInfo(PrintMachineInstrs.getValue()); - const PassInfo *IPI = PR->getPassInfo(StringRef("print-machineinstrs")); + const PassInfo *IPI = PR->getPassInfo(StringRef("machineinstr-printer")); assert (TPI && IPI && "Pass ID not registered!"); const char *TID = (const char *)(TPI->getTypeInfo()); const char *IID = (const char *)(IPI->getTypeInfo()); @@ -510,8 +537,7 @@ void TargetPassConfig::addMachinePasses() { printAndVerify("After Instruction Selection"); // Expand pseudo-instructions emitted by ISel. - if (addPass(&ExpandISelPseudosID)) - printAndVerify("After ExpandISelPseudos"); + addPass(&ExpandISelPseudosID); // Add passes that optimize machine instructions in SSA form. if (getOptLevel() != CodeGenOpt::None) { @@ -519,12 +545,11 @@ void TargetPassConfig::addMachinePasses() { } else { // If the target requests it, assign local variables to stack slots relative // to one another and simplify frame index references where possible. - addPass(&LocalStackSlotAllocationID); + addPass(&LocalStackSlotAllocationID, false); } // Run pre-ra passes. - if (addPreRegAlloc()) - printAndVerify("After PreRegAlloc passes"); + addPreRegAlloc(); // Run register allocation and passes that are tightly coupled with it, // including phi elimination and scheduling. @@ -534,12 +559,10 @@ void TargetPassConfig::addMachinePasses() { addFastRegAlloc(createRegAllocPass(false)); // Run post-ra passes. - if (addPostRegAlloc()) - printAndVerify("After PostRegAlloc passes"); + addPostRegAlloc(); // Insert prolog/epilog code. Eliminate abstract frame index references... addPass(&PrologEpilogCodeInserterID); - printAndVerify("After PrologEpilogCodeInserter"); /// Add passes that optimize machine instructions after register allocation. if (getOptLevel() != CodeGenOpt::None) @@ -547,11 +570,9 @@ void TargetPassConfig::addMachinePasses() { // Expand pseudo instructions before second scheduling pass. addPass(&ExpandPostRAPseudosID); - printAndVerify("After ExpandPostRAPseudos"); // Run pre-sched2 passes. - if (addPreSched2()) - printAndVerify("After PreSched2 passes"); + addPreSched2(); // Second pass scheduler. if (getOptLevel() != CodeGenOpt::None) { @@ -559,66 +580,61 @@ void TargetPassConfig::addMachinePasses() { addPass(&PostMachineSchedulerID); else addPass(&PostRASchedulerID); - printAndVerify("After PostRAScheduler"); } // GC if (addGCPasses()) { if (PrintGCInfo) - addPass(createGCInfoPrinter(dbgs())); + addPass(createGCInfoPrinter(dbgs()), false, false); } // Basic block placement. if (getOptLevel() != CodeGenOpt::None) addBlockPlacement(); - if (addPreEmitPass()) - printAndVerify("After PreEmit passes"); + addPreEmitPass(); - addPass(&StackMapLivenessID); + addPass(&StackMapLivenessID, false); + + AddingMachinePasses = false; } /// Add passes that optimize machine instructions in SSA form. void TargetPassConfig::addMachineSSAOptimization() { // Pre-ra tail duplication. - if (addPass(&EarlyTailDuplicateID)) - printAndVerify("After Pre-RegAlloc TailDuplicate"); + addPass(&EarlyTailDuplicateID); // Optimize PHIs before DCE: removing dead PHI cycles may make more // instructions dead. - addPass(&OptimizePHIsID); + addPass(&OptimizePHIsID, false); // This pass merges large allocas. StackSlotColoring is a different pass // which merges spill slots. - addPass(&StackColoringID); + addPass(&StackColoringID, false); // If the target requests it, assign local variables to stack slots relative // to one another and simplify frame index references where possible. - addPass(&LocalStackSlotAllocationID); + addPass(&LocalStackSlotAllocationID, false); // With optimization, dead code should already be eliminated. However // there is one known exception: lowered code for arguments that are only // used by tail calls, where the tail calls reuse the incoming stack // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll). addPass(&DeadMachineInstructionElimID); - printAndVerify("After codegen DCE pass"); // Allow targets to insert passes that improve instruction level parallelism, // like if-conversion. Such passes will typically need dominator trees and // loop info, just like LICM and CSE below. - if (addILPOpts()) - printAndVerify("After ILP optimizations"); + addILPOpts(); - addPass(&MachineLICMID); - addPass(&MachineCSEID); + addPass(&MachineLICMID, false); + addPass(&MachineCSEID, false); addPass(&MachineSinkingID); - printAndVerify("After Machine LICM, CSE and Sinking passes"); - addPass(&PeepholeOptimizerID); + addPass(&PeepholeOptimizerID, false); // Clean-up the dead code that may have been generated by peephole // rewriting. addPass(&DeadMachineInstructionElimID); - printAndVerify("After codegen peephole optimization pass"); } //===---------------------------------------------------------------------===// @@ -701,18 +717,17 @@ bool TargetPassConfig::usingDefaultRegAlloc() const { /// Add the minimum set of target-independent passes that are required for /// register allocation. No coalescing or scheduling. void TargetPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { - addPass(&PHIEliminationID); - addPass(&TwoAddressInstructionPassID); + addPass(&PHIEliminationID, false); + addPass(&TwoAddressInstructionPassID, false); addPass(RegAllocPass); - printAndVerify("After Register Allocation"); } /// Add standard target-independent passes that are tightly coupled with /// optimized register allocation, including coalescing, machine instruction /// scheduling, and register allocation itself. void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { - addPass(&ProcessImplicitDefsID); + addPass(&ProcessImplicitDefsID, false); // LiveVariables currently requires pure SSA form. // @@ -720,35 +735,30 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { // LiveVariables can be removed completely, and LiveIntervals can be directly // computed. (We still either need to regenerate kill flags after regalloc, or // preferably fix the scavenger to not depend on them). - addPass(&LiveVariablesID); + addPass(&LiveVariablesID, false); // Edge splitting is smarter with machine loop info. - addPass(&MachineLoopInfoID); - addPass(&PHIEliminationID); + addPass(&MachineLoopInfoID, false); + addPass(&PHIEliminationID, false); // Eventually, we want to run LiveIntervals before PHI elimination. if (EarlyLiveIntervals) - addPass(&LiveIntervalsID); + addPass(&LiveIntervalsID, false); - addPass(&TwoAddressInstructionPassID); + addPass(&TwoAddressInstructionPassID, false); addPass(&RegisterCoalescerID); - printAndVerify("After Register Coalescing"); // PreRA instruction scheduling. - if (addPass(&MachineSchedulerID)) - printAndVerify("After Machine Scheduling"); + addPass(&MachineSchedulerID); // Add the selected register allocation pass. addPass(RegAllocPass); - printAndVerify("After Register Allocation, before rewriter"); // Allow targets to change the register assignments before rewriting. - if (addPreRewrite()) - printAndVerify("After pre-rewrite passes"); + addPreRewrite(); // Finally rewrite virtual registers. addPass(&VirtRegRewriterID); - printAndVerify("After Virtual Register Rewriter"); // Perform stack slot coloring and post-ra machine LICM. // @@ -760,8 +770,6 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { // // FIXME: can this move into MachineLateOptimization? addPass(&PostRAMachineLICMID); - - printAndVerify("After StackSlotColoring and postra Machine LICM"); } //===---------------------------------------------------------------------===// @@ -771,34 +779,30 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { /// Add passes that optimize machine instructions after register allocation. void TargetPassConfig::addMachineLateOptimization() { // Branch folding must be run after regalloc and prolog/epilog insertion. - if (addPass(&BranchFolderPassID)) - printAndVerify("After BranchFolding"); + addPass(&BranchFolderPassID); // Tail duplication. // Note that duplicating tail just increases code size and degrades // performance for targets that require Structured Control Flow. // In addition it can also make CFG irreducible. Thus we disable it. - if (!TM->requiresStructuredCFG() && addPass(&TailDuplicateID)) - printAndVerify("After TailDuplicate"); + if (!TM->requiresStructuredCFG()) + addPass(&TailDuplicateID); // Copy propagation. - if (addPass(&MachineCopyPropagationID)) - printAndVerify("After copy propagation pass"); + addPass(&MachineCopyPropagationID); } /// Add standard GC passes. bool TargetPassConfig::addGCPasses() { - addPass(&GCMachineCodeAnalysisID); + addPass(&GCMachineCodeAnalysisID, false); return true; } /// Add standard basic block placement passes. void TargetPassConfig::addBlockPlacement() { - if (addPass(&MachineBlockPlacementID)) { + if (addPass(&MachineBlockPlacementID, false)) { // Run a separate pass to collect block placement statistics. if (EnableBlockPlacementStats) addPass(&MachineBlockPlacementStatsID); - - printAndVerify("After machine block placement."); } } diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index a296aea..283d1f2 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -133,7 +133,8 @@ namespace { bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, SmallPtrSetImpl<MachineInstr*> &LocalMIs); - bool optimizeSelect(MachineInstr *MI); + bool optimizeSelect(MachineInstr *MI, + SmallPtrSetImpl<MachineInstr *> &LocalMIs); bool optimizeCondBranch(MachineInstr *MI); bool optimizeCopyOrBitcast(MachineInstr *MI); bool optimizeCoalescableCopy(MachineInstr *MI); @@ -482,7 +483,8 @@ bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI, } /// Optimize a select instruction. -bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI) { +bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI, + SmallPtrSetImpl<MachineInstr *> &LocalMIs) { unsigned TrueOp = 0; unsigned FalseOp = 0; bool Optimizable = false; @@ -491,7 +493,7 @@ bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI) { return false; if (!Optimizable) return false; - if (!TII->optimizeSelect(MI)) + if (!TII->optimizeSelect(MI, LocalMIs)) return false; MI->eraseFromParent(); ++NumSelects; @@ -1072,6 +1074,13 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { MachineBasicBlock *MBB = &*I; bool SeenMoveImm = false; + + // During this forward scan, at some point it needs to answer the question + // "given a pointer to an MI in the current BB, is it located before or + // after the current instruction". + // To perform this, the following set keeps track of the MIs already seen + // during the scan, if a MI is not in the set, it is assumed to be located + // after. Newly created MIs have to be inserted in the set as well. SmallPtrSet<MachineInstr*, 16> LocalMIs; SmallSet<unsigned, 4> ImmDefRegs; DenseMap<unsigned, MachineInstr*> ImmDefMIs; @@ -1102,7 +1111,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { if ((isUncoalescableCopy(*MI) && optimizeUncoalescableCopy(MI, LocalMIs)) || (MI->isCompare() && optimizeCmpInstr(MI, MBB)) || - (MI->isSelect() && optimizeSelect(MI))) { + (MI->isSelect() && optimizeSelect(MI, LocalMIs))) { // MI is deleted. LocalMIs.erase(MI); Changed = true; diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index 89e1d11..ad59fc9 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -282,9 +282,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { } else { // Check that post-RA scheduling is enabled for this target. // This may upgrade the AntiDepMode. - const TargetSubtargetInfo &ST = - Fn.getTarget().getSubtarget<TargetSubtargetInfo>(); - if (!enablePostRAScheduler(ST, PassConfig->getOptLevel(), + if (!enablePostRAScheduler(Fn.getSubtarget(), PassConfig->getOptLevel(), AntiDepMode, CriticalPathRCs)) return false; } diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 06530b9..6d29b98 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -495,7 +495,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { unsigned Align = MFI->getObjectAlignment(i); // Adjust to alignment boundary - Offset = (Offset+Align-1)/Align*Align; + Offset = RoundUpToAlignment(Offset, Align); MFI->setObjectOffset(i, -Offset); // Set the computed offset } @@ -504,7 +504,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { for (int i = MaxCSFI; i >= MinCSFI ; --i) { unsigned Align = MFI->getObjectAlignment(i); // Adjust to alignment boundary - Offset = (Offset+Align-1)/Align*Align; + Offset = RoundUpToAlignment(Offset, Align); MFI->setObjectOffset(i, Offset); Offset += MFI->getObjectSize(i); @@ -537,7 +537,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { unsigned Align = MFI->getLocalFrameMaxAlign(); // Adjust to alignment boundary. - Offset = (Offset + Align - 1) / Align * Align; + Offset = RoundUpToAlignment(Offset, Align); DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n"); @@ -656,8 +656,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // If the frame pointer is eliminated, all frame offsets will be relative to // SP not FP. Align to MaxAlign so this works. StackAlign = std::max(StackAlign, MaxAlign); - unsigned AlignMask = StackAlign - 1; - Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); + Offset = RoundUpToAlignment(Offset, StackAlign); } // Update frame info to pretend that this is part of the stack... @@ -703,7 +702,8 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) { /// register references and actual offsets. /// void PEI::replaceFrameIndices(MachineFunction &Fn) { - if (!Fn.getFrameInfo()->hasStackObjects()) return; // Nothing to do? + const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); + if (!TFI.needsFrameIndexResolution(Fn)) return; // Store SPAdj at exit of a basic block. SmallVector<int, 8> SPState; @@ -743,26 +743,19 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo(); const TargetRegisterInfo &TRI = *Fn.getSubtarget().getRegisterInfo(); const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); - bool StackGrowsDown = - TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; int FrameSetupOpcode = TII.getCallFrameSetupOpcode(); int FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB); + bool InsideCallSequence = false; + for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { if (I->getOpcode() == FrameSetupOpcode || I->getOpcode() == FrameDestroyOpcode) { - // Remember how much SP has been adjusted to create the call - // frame. - int Size = I->getOperand(0).getImm(); - - if ((!StackGrowsDown && I->getOpcode() == FrameSetupOpcode) || - (StackGrowsDown && I->getOpcode() == FrameDestroyOpcode)) - Size = -Size; - - SPAdj += Size; + InsideCallSequence = (I->getOpcode() == FrameSetupOpcode); + SPAdj += TII.getSPAdjust(I); MachineBasicBlock::iterator PrevI = BB->end(); if (I != BB->begin()) PrevI = std::prev(I); @@ -797,6 +790,37 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, continue; } + // TODO: This code should be commoned with the code for + // PATCHPOINT. There's no good reason for the difference in + // implementation other than historical accident. The only + // remaining difference is the unconditional use of the stack + // pointer as the base register. + if (MI->getOpcode() == TargetOpcode::STATEPOINT) { + assert((!MI->isDebugValue() || i == 0) && + "Frame indicies can only appear as the first operand of a " + "DBG_VALUE machine instruction"); + unsigned Reg; + MachineOperand &Offset = MI->getOperand(i + 1); + const unsigned refOffset = + TFI->getFrameIndexReferenceFromSP(Fn, MI->getOperand(i).getIndex(), + Reg); + + Offset.setImm(Offset.getImm() + refOffset); + MI->getOperand(i).ChangeToRegister(Reg, false /*isDef*/); + continue; + } + + // Frame allocations are target independent. Simply swap the index with + // the offset. + if (MI->getOpcode() == TargetOpcode::FRAME_ALLOC) { + assert(TFI->hasFP(Fn) && "frame alloc requires FP"); + MachineOperand &FI = MI->getOperand(i); + unsigned Reg; + int FrameOffset = TFI->getFrameIndexReference(Fn, FI.getIndex(), Reg); + FI.ChangeToImmediate(FrameOffset); + continue; + } + // Some instructions (e.g. inline asm instructions) can have // multiple frame indices and/or cause eliminateFrameIndex // to insert more than one instruction. We need the register @@ -823,6 +847,16 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, break; } + // If we are looking at a call sequence, we need to keep track of + // the SP adjustment made by each instruction in the sequence. + // This includes both the frame setup/destroy pseudos (handled above), + // as well as other instructions that have side effects w.r.t the SP. + // Note that this must come after eliminateFrameIndex, because + // if I itself referred to a frame index, we shouldn't count its own + // adjustment. + if (MI && InsideCallSequence) + SPAdj += TII.getSPAdjust(MI); + if (DoIncr && I != BB->end()) ++I; // Update register states. diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp index 122afd1..6b346f4 100644 --- a/lib/CodeGen/RegAllocBase.cpp +++ b/lib/CodeGen/RegAllocBase.cpp @@ -90,6 +90,7 @@ void RegAllocBase::allocatePhysRegs() { // Unused registers can appear when the spiller coalesces snippets. if (MRI->reg_nodbg_empty(VirtReg->reg)) { DEBUG(dbgs() << "Dropping unused " << *VirtReg << '\n'); + aboutToRemoveInterval(*VirtReg); LIS->removeInterval(VirtReg->reg); continue; } @@ -139,6 +140,7 @@ void RegAllocBase::allocatePhysRegs() { assert(!VRM->hasPhys(SplitVirtReg->reg) && "Register already assigned"); if (MRI->reg_nodbg_empty(SplitVirtReg->reg)) { DEBUG(dbgs() << "not queueing unused " << *SplitVirtReg << '\n'); + aboutToRemoveInterval(*SplitVirtReg); LIS->removeInterval(SplitVirtReg->reg); continue; } diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h index bbd79cd..659b8f5 100644 --- a/lib/CodeGen/RegAllocBase.h +++ b/lib/CodeGen/RegAllocBase.h @@ -96,6 +96,9 @@ protected: // Use this group name for NamedRegionTimer. static const char TimerGroupName[]; + /// Method called when the allocator is about to remove a LiveInterval. + virtual void aboutToRemoveInterval(LiveInterval &LI) {} + public: /// VerifyEnabled - True when -verify-regalloc is given. static bool VerifyEnabled; diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index 8fc10b4..c621414 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -372,15 +372,23 @@ void RAFast::usePhysReg(MachineOperand &MO) { case regDisabled: break; case regReserved: - assert(TRI->isSuperRegister(PhysReg, Alias) && + // Either PhysReg is a subregister of Alias and we mark the + // whole register as free, or PhysReg is the superregister of + // Alias and we mark all the aliases as disabled before freeing + // PhysReg. + // In the latter case, since PhysReg was disabled, this means that + // its value is defined only by physical sub-registers. This check + // is performed by the assert of the default case in this loop. + // Note: The value of the superregister may only be partial + // defined, that is why regDisabled is a valid state for aliases. + assert((TRI->isSuperRegister(PhysReg, Alias) || + TRI->isSuperRegister(Alias, PhysReg)) && "Instruction is not using a subregister of a reserved register"); - // Leave the superregister in the working set. - PhysRegState[Alias] = regFree; - MO.getParent()->addRegisterKilled(Alias, TRI, true); - return; + // Fall through. case regFree: if (TRI->isSuperRegister(PhysReg, Alias)) { // Leave the superregister in the working set. + PhysRegState[Alias] = regFree; MO.getParent()->addRegisterKilled(Alias, TRI, true); return; } @@ -1023,8 +1031,7 @@ void RAFast::AllocateBasicBlock() { if (TargetRegisterInfo::isPhysicalRegister(Reg)) { if (!MRI->isAllocatable(Reg)) continue; - definePhysReg(MI, Reg, (MO.isImplicit() || MO.isDead()) ? - regFree : regReserved); + definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved); continue; } LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, CopySrc); diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 8ef5dcd..edc3294 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -296,6 +296,9 @@ class RAGreedy : public MachineFunctionPass, /// obtained from the TargetSubtargetInfo. bool EnableLocalReassign; + /// Set of broken hints that may be reconciled later because of eviction. + SmallSetVector<LiveInterval *, 8> SetOfBrokenHints; + public: RAGreedy(); @@ -311,6 +314,7 @@ public: void enqueue(LiveInterval *LI) override; LiveInterval *dequeue() override; unsigned selectOrSplit(LiveInterval&, SmallVectorImpl<unsigned>&) override; + void aboutToRemoveInterval(LiveInterval &) override; /// Perform register allocation. bool runOnMachineFunction(MachineFunction &mf) override; @@ -378,6 +382,24 @@ private: SmallVirtRegSet &, unsigned); bool tryRecoloringCandidates(PQueue &, SmallVectorImpl<unsigned> &, SmallVirtRegSet &, unsigned); + void tryHintRecoloring(LiveInterval &); + void tryHintsRecoloring(); + + /// Model the information carried by one end of a copy. + struct HintInfo { + /// The frequency of the copy. + BlockFrequency Freq; + /// The virtual register or physical register. + unsigned Reg; + /// Its currently assigned register. + /// In case of a physical register Reg == PhysReg. + unsigned PhysReg; + HintInfo(BlockFrequency Freq, unsigned Reg, unsigned PhysReg) + : Freq(Freq), Reg(Reg), PhysReg(PhysReg) {} + }; + typedef SmallVector<HintInfo, 4> HintsInfo; + BlockFrequency getBrokenHintFreq(const HintsInfo &, unsigned); + void collectHintInfo(unsigned, HintsInfo &); }; } // end anonymous namespace @@ -453,7 +475,9 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) { if (VRM->hasPhys(VirtReg)) { - Matrix->unassign(LIS->getInterval(VirtReg)); + LiveInterval &LI = LIS->getInterval(VirtReg); + Matrix->unassign(LI); + aboutToRemoveInterval(LI); return true; } // Unassigned virtreg is probably in the priority queue. @@ -2213,6 +2237,11 @@ unsigned RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, return PhysReg; } +void RAGreedy::aboutToRemoveInterval(LiveInterval &LI) { + // Do not keep invalid information around. + SetOfBrokenHints.remove(&LI); +} + void RAGreedy::initializeCSRCost() { // We use the larger one out of the command-line option and the value report // by TRI. @@ -2238,6 +2267,170 @@ void RAGreedy::initializeCSRCost() { CSRCost = CSRCost.getFrequency() * (ActualEntry / FixedEntry); } +/// \brief Collect the hint info for \p Reg. +/// The results are stored into \p Out. +/// \p Out is not cleared before being populated. +void RAGreedy::collectHintInfo(unsigned Reg, HintsInfo &Out) { + for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) { + if (!Instr.isFullCopy()) + continue; + // Look for the other end of the copy. + unsigned OtherReg = Instr.getOperand(0).getReg(); + if (OtherReg == Reg) { + OtherReg = Instr.getOperand(1).getReg(); + if (OtherReg == Reg) + continue; + } + // Get the current assignment. + unsigned OtherPhysReg = TargetRegisterInfo::isPhysicalRegister(OtherReg) + ? OtherReg + : VRM->getPhys(OtherReg); + // Push the collected information. + Out.push_back(HintInfo(MBFI->getBlockFreq(Instr.getParent()), OtherReg, + OtherPhysReg)); + } +} + +/// \brief Using the given \p List, compute the cost of the broken hints if +/// \p PhysReg was used. +/// \return The cost of \p List for \p PhysReg. +BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List, + unsigned PhysReg) { + BlockFrequency Cost = 0; + for (const HintInfo &Info : List) { + if (Info.PhysReg != PhysReg) + Cost += Info.Freq; + } + return Cost; +} + +/// \brief Using the register assigned to \p VirtReg, try to recolor +/// all the live ranges that are copy-related with \p VirtReg. +/// The recoloring is then propagated to all the live-ranges that have +/// been recolored and so on, until no more copies can be coalesced or +/// it is not profitable. +/// For a given live range, profitability is determined by the sum of the +/// frequencies of the non-identity copies it would introduce with the old +/// and new register. +void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) { + // We have a broken hint, check if it is possible to fix it by + // reusing PhysReg for the copy-related live-ranges. Indeed, we evicted + // some register and PhysReg may be available for the other live-ranges. + SmallSet<unsigned, 4> Visited; + SmallVector<unsigned, 2> RecoloringCandidates; + HintsInfo Info; + unsigned Reg = VirtReg.reg; + unsigned PhysReg = VRM->getPhys(Reg); + // Start the recoloring algorithm from the input live-interval, then + // it will propagate to the ones that are copy-related with it. + Visited.insert(Reg); + RecoloringCandidates.push_back(Reg); + + DEBUG(dbgs() << "Trying to reconcile hints for: " << PrintReg(Reg, TRI) << '(' + << PrintReg(PhysReg, TRI) << ")\n"); + + do { + Reg = RecoloringCandidates.pop_back_val(); + + // We cannot recolor physcal register. + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + + assert(VRM->hasPhys(Reg) && "We have unallocated variable!!"); + + // Get the live interval mapped with this virtual register to be able + // to check for the interference with the new color. + LiveInterval &LI = LIS->getInterval(Reg); + unsigned CurrPhys = VRM->getPhys(Reg); + // Check that the new color matches the register class constraints and + // that it is free for this live range. + if (CurrPhys != PhysReg && (!MRI->getRegClass(Reg)->contains(PhysReg) || + Matrix->checkInterference(LI, PhysReg))) + continue; + + DEBUG(dbgs() << PrintReg(Reg, TRI) << '(' << PrintReg(CurrPhys, TRI) + << ") is recolorable.\n"); + + // Gather the hint info. + Info.clear(); + collectHintInfo(Reg, Info); + // Check if recoloring the live-range will increase the cost of the + // non-identity copies. + if (CurrPhys != PhysReg) { + DEBUG(dbgs() << "Checking profitability:\n"); + BlockFrequency OldCopiesCost = getBrokenHintFreq(Info, CurrPhys); + BlockFrequency NewCopiesCost = getBrokenHintFreq(Info, PhysReg); + DEBUG(dbgs() << "Old Cost: " << OldCopiesCost.getFrequency() + << "\nNew Cost: " << NewCopiesCost.getFrequency() << '\n'); + if (OldCopiesCost < NewCopiesCost) { + DEBUG(dbgs() << "=> Not profitable.\n"); + continue; + } + // At this point, the cost is either cheaper or equal. If it is + // equal, we consider this is profitable because it may expose + // more recoloring opportunities. + DEBUG(dbgs() << "=> Profitable.\n"); + // Recolor the live-range. + Matrix->unassign(LI); + Matrix->assign(LI, PhysReg); + } + // Push all copy-related live-ranges to keep reconciling the broken + // hints. + for (const HintInfo &HI : Info) { + if (Visited.insert(HI.Reg).second) + RecoloringCandidates.push_back(HI.Reg); + } + } while (!RecoloringCandidates.empty()); +} + +/// \brief Try to recolor broken hints. +/// Broken hints may be repaired by recoloring when an evicted variable +/// freed up a register for a larger live-range. +/// Consider the following example: +/// BB1: +/// a = +/// b = +/// BB2: +/// ... +/// = b +/// = a +/// Let us assume b gets split: +/// BB1: +/// a = +/// b = +/// BB2: +/// c = b +/// ... +/// d = c +/// = d +/// = a +/// Because of how the allocation work, b, c, and d may be assigned different +/// colors. Now, if a gets evicted later: +/// BB1: +/// a = +/// st a, SpillSlot +/// b = +/// BB2: +/// c = b +/// ... +/// d = c +/// = d +/// e = ld SpillSlot +/// = e +/// This is likely that we can assign the same register for b, c, and d, +/// getting rid of 2 copies. +void RAGreedy::tryHintsRecoloring() { + for (LiveInterval *LI : SetOfBrokenHints) { + assert(TargetRegisterInfo::isVirtualRegister(LI->reg) && + "Recoloring is possible only for virtual registers"); + // Some dead defs may be around (e.g., because of debug uses). + // Ignore those. + if (!VRM->hasPhys(LI->reg)) + continue; + tryHintRecoloring(*LI); + } +} + unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, SmallVectorImpl<unsigned> &NewVRegs, SmallVirtRegSet &FixedRegisters, @@ -2274,8 +2467,18 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, // queue. The RS_Split ranges already failed to do this, and they should not // get a second chance until they have been split. if (Stage != RS_Split) - if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit)) + if (unsigned PhysReg = + tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit)) { + unsigned Hint = MRI->getSimpleHint(VirtReg.reg); + // If VirtReg has a hint and that hint is broken record this + // virtual register as a recoloring candidate for broken hint. + // Indeed, since we evicted a variable in its neighborhood it is + // likely we can at least partially recolor some of the + // copy-related live-ranges. + if (Hint && Hint != PhysReg) + SetOfBrokenHints.insert(&VirtReg); return PhysReg; + } assert(NewVRegs.empty() && "Cannot append to existing NewVRegs"); @@ -2355,8 +2558,10 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { NextCascade = 1; IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI); GlobalCand.resize(32); // This will grow as needed. + SetOfBrokenHints.clear(); allocatePhysRegs(); + tryHintsRecoloring(); releaseMemory(); return true; } diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index eb7e5633..77a42b3 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -126,7 +126,12 @@ private: void findVRegIntervalsToAlloc(const MachineFunction &MF, LiveIntervals &LIS); /// \brief Constructs an initial graph. - void initializeGraph(PBQPRAGraph &G); + void initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM, Spiller &VRegSpiller); + + /// \brief Spill the given VReg. + void spillVReg(unsigned VReg, SmallVectorImpl<unsigned> &NewIntervals, + MachineFunction &MF, LiveIntervals &LIS, VirtRegMap &VRM, + Spiller &VRegSpiller); /// \brief Given a solved PBQP problem maps this solution back to a register /// assignment. @@ -172,8 +177,6 @@ public: class Interference : public PBQPRAConstraint { private: -private: - typedef const PBQP::RegAlloc::AllowedRegVector* AllowedRegVecPtr; typedef std::pair<AllowedRegVecPtr, AllowedRegVecPtr> IMatrixKey; typedef DenseMap<IMatrixKey, PBQPRAGraph::MatrixPtr> IMatrixCache; @@ -308,7 +311,7 @@ private: PBQPRAGraph::NodeId MId, IMatrixCache &C) { const TargetRegisterInfo &TRI = - *G.getMetadata().MF.getTarget().getSubtargetImpl()->getRegisterInfo(); + *G.getMetadata().MF.getSubtarget().getRegisterInfo(); const auto &NRegs = G.getNodeMetadata(NId).getAllowedRegs(); const auto &MRegs = G.getNodeMetadata(MId).getAllowedRegs(); @@ -342,7 +345,7 @@ public: void apply(PBQPRAGraph &G) override { MachineFunction &MF = G.getMetadata().MF; MachineBlockFrequencyInfo &MBFI = G.getMetadata().MBFI; - CoalescerPair CP(*MF.getTarget().getSubtargetImpl()->getRegisterInfo()); + CoalescerPair CP(*MF.getSubtarget().getRegisterInfo()); // Scan the machine function and add a coalescing cost whenever CoalescerPair // gives the Ok. @@ -398,7 +401,7 @@ public: } PBQPRAGraph::RawMatrix Costs(G.getEdgeCosts(EId)); addVirtRegCoalesce(Costs, *Allowed1, *Allowed2, CBenefit); - G.setEdgeCosts(EId, std::move(Costs)); + G.updateEdgeCosts(EId, std::move(Costs)); } } } @@ -488,15 +491,21 @@ static bool isACalleeSavedRegister(unsigned reg, const TargetRegisterInfo &TRI, return false; } -void RegAllocPBQP::initializeGraph(PBQPRAGraph &G) { +void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM, + Spiller &VRegSpiller) { MachineFunction &MF = G.getMetadata().MF; LiveIntervals &LIS = G.getMetadata().LIS; const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo(); const TargetRegisterInfo &TRI = - *G.getMetadata().MF.getTarget().getSubtargetImpl()->getRegisterInfo(); + *G.getMetadata().MF.getSubtarget().getRegisterInfo(); + + std::vector<unsigned> Worklist(VRegsToAlloc.begin(), VRegsToAlloc.end()); + + while (!Worklist.empty()) { + unsigned VReg = Worklist.back(); + Worklist.pop_back(); - for (auto VReg : VRegsToAlloc) { const TargetRegisterClass *TRC = MRI.getRegClass(VReg); LiveInterval &VRegLI = LIS.getInterval(VReg); @@ -531,6 +540,15 @@ void RegAllocPBQP::initializeGraph(PBQPRAGraph &G) { VRegAllowed.push_back(PReg); } + // Check for vregs that have no allowed registers. These should be + // pre-spilled and the new vregs added to the worklist. + if (VRegAllowed.empty()) { + SmallVector<unsigned, 8> NewVRegs; + spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller); + Worklist.insert(Worklist.end(), NewVRegs.begin(), NewVRegs.end()); + continue; + } + PBQPRAGraph::RawVector NodeCosts(VRegAllowed.size() + 1, 0); // Tweak cost of callee saved registers, as using then force spilling and @@ -547,14 +565,40 @@ void RegAllocPBQP::initializeGraph(PBQPRAGraph &G) { } } +void RegAllocPBQP::spillVReg(unsigned VReg, + SmallVectorImpl<unsigned> &NewIntervals, + MachineFunction &MF, LiveIntervals &LIS, + VirtRegMap &VRM, Spiller &VRegSpiller) { + + VRegsToAlloc.erase(VReg); + LiveRangeEdit LRE(&LIS.getInterval(VReg), NewIntervals, MF, LIS, &VRM); + VRegSpiller.spill(LRE); + + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + (void)TRI; + DEBUG(dbgs() << "VREG " << PrintReg(VReg, &TRI) << " -> SPILLED (Cost: " + << LRE.getParent().weight << ", New vregs: "); + + // Copy any newly inserted live intervals into the list of regs to + // allocate. + for (LiveRangeEdit::iterator I = LRE.begin(), E = LRE.end(); + I != E; ++I) { + const LiveInterval &LI = LIS.getInterval(*I); + assert(!LI.empty() && "Empty spill range."); + DEBUG(dbgs() << PrintReg(LI.reg, &TRI) << " "); + VRegsToAlloc.insert(LI.reg); + } + + DEBUG(dbgs() << ")\n"); +} + bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph &G, const PBQP::Solution &Solution, VirtRegMap &VRM, Spiller &VRegSpiller) { MachineFunction &MF = G.getMetadata().MF; LiveIntervals &LIS = G.getMetadata().LIS; - const TargetRegisterInfo &TRI = - *MF.getTarget().getSubtargetImpl()->getRegisterInfo(); + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); (void)TRI; // Set to true if we have any spills @@ -576,28 +620,11 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph &G, assert(PReg != 0 && "Invalid preg selected."); VRM.assignVirt2Phys(VReg, PReg); } else { - VRegsToAlloc.erase(VReg); - SmallVector<unsigned, 8> NewSpills; - LiveRangeEdit LRE(&LIS.getInterval(VReg), NewSpills, MF, LIS, &VRM); - VRegSpiller.spill(LRE); - - DEBUG(dbgs() << "VREG " << PrintReg(VReg, &TRI) << " -> SPILLED (Cost: " - << LRE.getParent().weight << ", New vregs: "); - - // Copy any newly inserted live intervals into the list of regs to - // allocate. - for (LiveRangeEdit::iterator I = LRE.begin(), E = LRE.end(); - I != E; ++I) { - LiveInterval &LI = LIS.getInterval(*I); - assert(!LI.empty() && "Empty spill range."); - DEBUG(dbgs() << PrintReg(LI.reg, &TRI) << " "); - VRegsToAlloc.insert(LI.reg); - } - - DEBUG(dbgs() << ")\n"); - - // We need another round if spill intervals were added. - AnotherRoundNeeded |= !LRE.empty(); + // Spill VReg. If this introduces new intervals we'll need another round + // of allocation. + SmallVector<unsigned, 8> NewVRegs; + spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller); + AnotherRoundNeeded |= !NewVRegs.empty(); } } @@ -670,7 +697,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { // If there are non-empty intervals allocate them using pbqp. if (!VRegsToAlloc.empty()) { - const TargetSubtargetInfo &Subtarget = *MF.getTarget().getSubtargetImpl(); + const TargetSubtargetInfo &Subtarget = MF.getSubtarget(); std::unique_ptr<PBQPRAConstraintList> ConstraintsRoot = llvm::make_unique<PBQPRAConstraintList>(); ConstraintsRoot->addConstraint(llvm::make_unique<SpillCosts>()); @@ -686,7 +713,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << " PBQP Regalloc round " << Round << ":\n"); PBQPRAGraph G(PBQPRAGraph::GraphMetadata(MF, LIS, MBFI)); - initializeGraph(G); + initializeGraph(G, VRM, *VRegSpiller); ConstraintsRoot->apply(G); #ifndef NDEBUG @@ -699,7 +726,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { raw_fd_ostream OS(GraphFileName, EC, sys::fs::F_Text); DEBUG(dbgs() << "Dumping graph for round " << Round << " to \"" << GraphFileName << "\"\n"); - G.dumpToStream(OS); + G.dump(OS); } #endif @@ -719,6 +746,79 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { return true; } +namespace { +// A helper class for printing node and register info in a consistent way +class PrintNodeInfo { +public: + typedef PBQP::RegAlloc::PBQPRAGraph Graph; + typedef PBQP::RegAlloc::PBQPRAGraph::NodeId NodeId; + + PrintNodeInfo(NodeId NId, const Graph &G) : G(G), NId(NId) {} + + void print(raw_ostream &OS) const { + const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo(); + const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); + unsigned VReg = G.getNodeMetadata(NId).getVReg(); + const char *RegClassName = TRI->getRegClassName(MRI.getRegClass(VReg)); + OS << NId << " (" << RegClassName << ':' << PrintReg(VReg, TRI) << ')'; + } + +private: + const Graph &G; + NodeId NId; +}; + +inline raw_ostream &operator<<(raw_ostream &OS, const PrintNodeInfo &PR) { + PR.print(OS); + return OS; +} +} // anonymous namespace + +void PBQP::RegAlloc::PBQPRAGraph::dump(raw_ostream &OS) const { + for (auto NId : nodeIds()) { + const Vector &Costs = getNodeCosts(NId); + assert(Costs.getLength() != 0 && "Empty vector in graph."); + OS << PrintNodeInfo(NId, *this) << ": " << Costs << '\n'; + } + OS << '\n'; + + for (auto EId : edgeIds()) { + NodeId N1Id = getEdgeNode1Id(EId); + NodeId N2Id = getEdgeNode2Id(EId); + assert(N1Id != N2Id && "PBQP graphs should not have self-edges."); + const Matrix &M = getEdgeCosts(EId); + assert(M.getRows() != 0 && "No rows in matrix."); + assert(M.getCols() != 0 && "No cols in matrix."); + OS << PrintNodeInfo(N1Id, *this) << ' ' << M.getRows() << " rows / "; + OS << PrintNodeInfo(N2Id, *this) << ' ' << M.getCols() << " cols:\n"; + OS << M << '\n'; + } +} + +void PBQP::RegAlloc::PBQPRAGraph::dump() const { dump(dbgs()); } + +void PBQP::RegAlloc::PBQPRAGraph::printDot(raw_ostream &OS) const { + OS << "graph {\n"; + for (auto NId : nodeIds()) { + OS << " node" << NId << " [ label=\"" + << PrintNodeInfo(NId, *this) << "\\n" + << getNodeCosts(NId) << "\" ]\n"; + } + + OS << " edge [ len=" << nodeIds().size() << " ]\n"; + for (auto EId : edgeIds()) { + OS << " node" << getEdgeNode1Id(EId) + << " -- node" << getEdgeNode2Id(EId) + << " [ label=\""; + const Matrix &EdgeCosts = getEdgeCosts(EId); + for (unsigned i = 0; i < EdgeCosts.getRows(); ++i) { + OS << EdgeCosts.getRowAsVector(i) << "\\n"; + } + OS << "\" ]\n"; + } + OS << "}\n"; +} + FunctionPass *llvm::createPBQPRegisterAllocator(char *customPassID) { return new RegAllocPBQP(customPassID); } diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp index e0d1aa2..ab33672 100644 --- a/lib/CodeGen/RegisterClassInfo.cpp +++ b/lib/CodeGen/RegisterClassInfo.cpp @@ -47,6 +47,7 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) { } // Does this MF have different CSRs? + assert(TRI && "no register info set"); const MCPhysReg *CSR = TRI->getCalleeSavedRegs(MF); if (Update || CSR != CalleeSaved) { // Build a CSRNum map. Every CSR alias gets an entry pointing to the last @@ -76,6 +77,7 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) { /// registers filtered out. Volatile registers come first followed by CSR /// aliases ordered according to the CSR order specified by the target. void RegisterClassInfo::compute(const TargetRegisterClass *RC) const { + assert(RC && "no register class given"); RCInfo &RCI = RegClass[RC->getID()]; // Raw register count, including all reserved regs. diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 2d2dc92..1e4cfe8 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -32,6 +32,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" @@ -57,12 +58,12 @@ EnableJoining("join-liveintervals", cl::desc("Coalesce copies (default=true)"), cl::init(true)); -// Temporary flag to test critical edge unsplitting. +/// Temporary flag to test critical edge unsplitting. static cl::opt<bool> EnableJoinSplits("join-splitedges", cl::desc("Coalesce copies on split edges (default=subtarget)"), cl::Hidden); -// Temporary flag to test global copy optimization. +/// Temporary flag to test global copy optimization. static cl::opt<cl::boolOrDefault> EnableGlobalCopies("join-globalcopies", cl::desc("Coalesce copies that span blocks (default=subtarget)"), @@ -86,6 +87,14 @@ namespace { AliasAnalysis *AA; RegisterClassInfo RegClassInfo; + /// A LaneMask to remember on which subregister live ranges we need to call + /// shrinkToUses() later. + unsigned ShrinkMask; + + /// True if the main range of the currently coalesced intervals should be + /// checked for smaller live intervals. + bool ShrinkMainRange; + /// \brief True if the coalescer should aggressively coalesce global copies /// in favor of keeping local copies. bool JoinGlobalCopies; @@ -111,7 +120,7 @@ namespace { /// Recursively eliminate dead defs in DeadDefs. void eliminateDeadDefs(); - /// LiveRangeEdit callback. + /// LiveRangeEdit callback for eliminateDeadDefs(). void LRE_WillEraseInstruction(MachineInstr *MI) override; /// Coalesce the LocalWorkList. @@ -124,16 +133,15 @@ namespace { /// copies that cannot yet be coalesced into WorkList. void copyCoalesceInMBB(MachineBasicBlock *MBB); - /// Try to coalesce all copies in CurrList. Return - /// true if any progress was made. + /// Tries to coalesce all copies in CurrList. Returns true if any progress + /// was made. bool copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList); - /// Attempt to join intervals corresponding to SrcReg/DstReg, - /// which are the src/dst of the copy instruction CopyMI. This returns - /// true if the copy was successfully coalesced away. If it is not - /// currently possible to coalesce this interval, but it may be possible if - /// other things get coalesced, then it returns true by reference in - /// 'Again'. + /// Attempt to join intervals corresponding to SrcReg/DstReg, which are the + /// src/dst of the copy instruction CopyMI. This returns true if the copy + /// was successfully coalesced away. If it is not currently possible to + /// coalesce this interval, but it may be possible if other things get + /// coalesced, then it returns true by reference in 'Again'. bool joinCopy(MachineInstr *TheCopy, bool &Again); /// Attempt to join these two intervals. On failure, this @@ -147,10 +155,23 @@ namespace { /// Attempt joining with a reserved physreg. bool joinReservedPhysReg(CoalescerPair &CP); - /// We found a non-trivially-coalescable copy. If - /// the source value number is defined by a copy from the destination reg - /// see if we can merge these two destination reg valno# into a single - /// value number, eliminating a copy. + /// Add the LiveRange @p ToMerge as a subregister liverange of @p LI. + /// Subranges in @p LI which only partially interfere with the desired + /// LaneMask are split as necessary. @p LaneMask are the lanes that + /// @p ToMerge will occupy in the coalescer register. @p LI has its subrange + /// lanemasks already adjusted to the coalesced register. + void mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge, + unsigned LaneMask, CoalescerPair &CP); + + /// Join the liveranges of two subregisters. Joins @p RRange into + /// @p LRange, @p RRange may be invalid afterwards. + void joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, + unsigned LaneMask, const CoalescerPair &CP); + + /// We found a non-trivially-coalescable copy. If the source value number is + /// defined by a copy from the destination reg see if we can merge these two + /// destination reg valno# into a single value number, eliminating a copy. + /// This returns true if an interval was modified. bool adjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI); /// Return true if there are definitions of IntB @@ -162,6 +183,7 @@ namespace { /// If the source value number is defined by a commutable instruction and /// its other operand is coalesced to the copy dest register, see if we /// can transform the copy into a noop by commuting the definition. + /// This returns true if an interval was modified. bool removeCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI); /// If the source of a copy is defined by a @@ -169,21 +191,21 @@ namespace { bool reMaterializeTrivialDef(CoalescerPair &CP, MachineInstr *CopyMI, bool &IsDefCopy); - /// Return true if a physreg copy should be joined. + /// Return true if a copy involving a physreg should be joined. bool canJoinPhys(const CoalescerPair &CP); - /// Replace all defs and uses of SrcReg to DstReg and - /// update the subregister number if it is not zero. If DstReg is a - /// physical register and the existing subregister number of the def / use - /// being updated is not zero, make sure to set it to the correct physical - /// subregister. + /// Replace all defs and uses of SrcReg to DstReg and update the subregister + /// number if it is not zero. If DstReg is a physical register and the + /// existing subregister number of the def / use being updated is not zero, + /// make sure to set it to the correct physical subregister. void updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx); /// Handle copies of undef values. - bool eliminateUndefCopy(MachineInstr *CopyMI, const CoalescerPair &CP); + /// Returns true if @p CopyMI was a copy of an undef value and eliminated. + bool eliminateUndefCopy(MachineInstr *CopyMI); public: - static char ID; // Class identification, replacement for typeinfo + static char ID; ///< Class identification, replacement for typeinfo RegisterCoalescer() : MachineFunctionPass(ID) { initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); } @@ -198,7 +220,7 @@ namespace { /// Implement the dump method. void print(raw_ostream &O, const Module* = nullptr) const override; }; -} /// end anonymous namespace +} // end anonymous namespace char &llvm::RegisterCoalescerID = RegisterCoalescer::ID; @@ -232,11 +254,11 @@ static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI, return true; } -// Return true if this block should be vacated by the coalescer to eliminate -// branches. The important cases to handle in the coalescer are critical edges -// split during phi elimination which contain only copies. Simple blocks that -// contain non-branches should also be vacated, but this can be handled by an -// earlier pass similar to early if-conversion. +/// Return true if this block should be vacated by the coalescer to eliminate +/// branches. The important cases to handle in the coalescer are critical edges +/// split during phi elimination which contain only copies. Simple blocks that +/// contain non-branches should also be vacated, but this can be handled by an +/// earlier pass similar to early if-conversion. static bool isSplitEdge(const MachineBasicBlock *MBB) { if (MBB->pred_size() != 1 || MBB->succ_size() != 1) return false; @@ -401,27 +423,11 @@ void RegisterCoalescer::eliminateDeadDefs() { nullptr, this).eliminateDeadDefs(DeadDefs); } -// Callback from eliminateDeadDefs(). void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) { // MI may be in WorkList. Make sure we don't visit it. ErasedInstrs.insert(MI); } -/// We found a non-trivially-coalescable copy with IntA -/// being the source and IntB being the dest, thus this defines a value number -/// in IntB. If the source value number (in IntA) is defined by a copy from B, -/// see if we can merge these two pieces of B into a single value number, -/// eliminating a copy. For example: -/// -/// A3 = B0 -/// ... -/// B1 = A3 <- this copy -/// -/// In this case, B0 can be extended to where the B1 copy lives, allowing the B1 -/// value number to be replaced with B0 (which simplifies the B liveinterval). -/// -/// This returns true if an interval was modified. -/// bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI) { assert(!CP.isPartial() && "This doesn't work for partial copies."); @@ -433,6 +439,20 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); + // We have a non-trivially-coalescable copy with IntA being the source and + // IntB being the dest, thus this defines a value number in IntB. If the + // source value number (in IntA) is defined by a copy from B, see if we can + // merge these two pieces of B into a single value number, eliminating a copy. + // For example: + // + // A3 = B0 + // ... + // B1 = A3 <- this copy + // + // In this case, B0 can be extended to where the B1 copy lives, allowing the + // B1 value number to be replaced with B0 (which simplifies the B + // liveinterval). + // BValNo is a value number in B that is defined by a copy from A. 'B1' in // the example above. LiveInterval::iterator BS = IntB.FindSegmentContaining(CopyIdx); @@ -492,6 +512,16 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, // Okay, merge "B1" into the same value number as "B0". if (BValNo != ValS->valno) IntB.MergeValueNumberInto(BValNo, ValS->valno); + + // Do the same for the subregister segments. + for (LiveInterval::SubRange &S : IntB.subranges()) { + VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx); + S.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, SubBValNo)); + VNInfo *SubValSNo = S.getVNInfoAt(AValNo->def.getPrevSlot()); + if (SubBValNo != SubValSNo) + S.MergeValueNumberInto(SubBValNo, SubValSNo); + } + DEBUG(dbgs() << " result = " << IntB << '\n'); // If the source instruction was killing the source register before the @@ -512,8 +542,6 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, return true; } -/// Return true if there are definitions of IntB -/// other than BValNo val# that can reach uses of AValno val# of IntA. bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB, VNInfo *AValNo, @@ -523,69 +551,75 @@ bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA, if (LIS->hasPHIKill(IntA, AValNo)) return true; - for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); - AI != AE; ++AI) { - if (AI->valno != AValNo) continue; + for (LiveRange::Segment &ASeg : IntA.segments) { + if (ASeg.valno != AValNo) continue; LiveInterval::iterator BI = - std::upper_bound(IntB.begin(), IntB.end(), AI->start); + std::upper_bound(IntB.begin(), IntB.end(), ASeg.start); if (BI != IntB.begin()) --BI; - for (; BI != IntB.end() && AI->end >= BI->start; ++BI) { + for (; BI != IntB.end() && ASeg.end >= BI->start; ++BI) { if (BI->valno == BValNo) continue; - if (BI->start <= AI->start && BI->end > AI->start) + if (BI->start <= ASeg.start && BI->end > ASeg.start) return true; - if (BI->start > AI->start && BI->start < AI->end) + if (BI->start > ASeg.start && BI->start < ASeg.end) return true; } } return false; } -/// We found a non-trivially-coalescable copy with -/// IntA being the source and IntB being the dest, thus this defines a value -/// number in IntB. If the source value number (in IntA) is defined by a -/// commutable instruction and its other operand is coalesced to the copy dest -/// register, see if we can transform the copy into a noop by commuting the -/// definition. For example, -/// -/// A3 = op A2 B0<kill> -/// ... -/// B1 = A3 <- this copy -/// ... -/// = op A3 <- more uses -/// -/// ==> -/// -/// B2 = op B0 A2<kill> -/// ... -/// B1 = B2 <- now an identify copy -/// ... -/// = op B2 <- more uses -/// -/// This returns true if an interval was modified. -/// +/// Copy segements with value number @p SrcValNo from liverange @p Src to live +/// range @Dst and use value number @p DstValNo there. +static void addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo, + const LiveRange &Src, const VNInfo *SrcValNo) +{ + for (const LiveRange::Segment &S : Src.segments) { + if (S.valno != SrcValNo) + continue; + Dst.addSegment(LiveRange::Segment(S.start, S.end, DstValNo)); + } +} + bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, MachineInstr *CopyMI) { - assert (!CP.isPhys()); - - SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); + assert(!CP.isPhys()); LiveInterval &IntA = - LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); + LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); LiveInterval &IntB = - LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); + LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); + + // We found a non-trivially-coalescable copy with IntA being the source and + // IntB being the dest, thus this defines a value number in IntB. If the + // source value number (in IntA) is defined by a commutable instruction and + // its other operand is coalesced to the copy dest register, see if we can + // transform the copy into a noop by commuting the definition. For example, + // + // A3 = op A2 B0<kill> + // ... + // B1 = A3 <- this copy + // ... + // = op A3 <- more uses + // + // ==> + // + // B2 = op B0 A2<kill> + // ... + // B1 = B2 <- now an identity copy + // ... + // = op B2 <- more uses // BValNo is a value number in B that is defined by a copy from A. 'B1' in // the example above. + SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx); - if (!BValNo || BValNo->def != CopyIdx) - return false; + assert(BValNo != nullptr && BValNo->def == CopyIdx); // AValNo is the value number in A that defines the copy, A3 in the example. VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true)); - assert(AValNo && "COPY source not live"); - if (AValNo->isPHIDef() || AValNo->isUnused()) + assert(AValNo && !AValNo->isUnused() && "COPY source not live"); + if (AValNo->isPHIDef()) return false; MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def); if (!DefMI) @@ -652,8 +686,6 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, MBB->insert(Pos, NewMI); MBB->erase(DefMI); } - unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false); - NewMI->getOperand(OpIdx).setIsKill(); // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g. // A = or A, B @@ -666,10 +698,13 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, // Update uses of IntA of the specific Val# with IntB. for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(IntA.reg), - UE = MRI->use_end(); UI != UE;) { + UE = MRI->use_end(); + UI != UE; /* ++UI is below because of possible MI removal */) { MachineOperand &UseMO = *UI; - MachineInstr *UseMI = UseMO.getParent(); ++UI; + if (UseMO.isUndef()) + continue; + MachineInstr *UseMI = UseMO.getParent(); if (UseMI->isDebugValue()) { // FIXME These don't have an instruction index. Not clear we have enough // info to decide whether to do this replacement or not. For now do it. @@ -678,7 +713,8 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, } SlotIndex UseIdx = LIS->getInstructionIndex(UseMI).getRegSlot(true); LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx); - if (US == IntA.end() || US->valno != AValNo) + assert(US != IntA.end() && "Use must be live"); + if (US->valno != AValNo) continue; // Kill flags are no longer accurate. They are recomputed after RA. UseMO.setIsKill(false); @@ -702,7 +738,16 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, continue; DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI); assert(DVNI->def == DefIdx); - BValNo = IntB.MergeValueNumberInto(BValNo, DVNI); + BValNo = IntB.MergeValueNumberInto(DVNI, BValNo); + for (LiveInterval::SubRange &S : IntB.subranges()) { + VNInfo *SubDVNI = S.getVNInfoAt(DefIdx); + if (!SubDVNI) + continue; + VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx); + assert(SubBValNo->def == CopyIdx); + S.MergeValueNumberInto(SubDVNI, SubBValNo); + } + ErasedInstrs.insert(UseMI); LIS->RemoveMachineInstrFromMaps(UseMI); UseMI->eraseFromParent(); @@ -710,23 +755,82 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, // Extend BValNo by merging in IntA live segments of AValNo. Val# definition // is updated. - VNInfo *ValNo = BValNo; - ValNo->def = AValNo->def; - for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); - AI != AE; ++AI) { - if (AI->valno != AValNo) continue; - IntB.addSegment(LiveInterval::Segment(AI->start, AI->end, ValNo)); + BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); + if (IntB.hasSubRanges()) { + if (!IntA.hasSubRanges()) { + unsigned Mask = MRI->getMaxLaneMaskForVReg(IntA.reg); + IntA.createSubRangeFrom(Allocator, Mask, IntA); + } + SlotIndex AIdx = CopyIdx.getRegSlot(true); + for (LiveInterval::SubRange &SA : IntA.subranges()) { + VNInfo *ASubValNo = SA.getVNInfoAt(AIdx); + assert(ASubValNo != nullptr); + + unsigned AMask = SA.LaneMask; + for (LiveInterval::SubRange &SB : IntB.subranges()) { + unsigned BMask = SB.LaneMask; + unsigned Common = BMask & AMask; + if (Common == 0) + continue; + + DEBUG( + dbgs() << format("\t\tCopy+Merge %04X into %04X\n", BMask, Common)); + unsigned BRest = BMask & ~AMask; + LiveInterval::SubRange *CommonRange; + if (BRest != 0) { + SB.LaneMask = BRest; + DEBUG(dbgs() << format("\t\tReduce Lane to %04X\n", BRest)); + // Duplicate SubRange for newly merged common stuff. + CommonRange = IntB.createSubRangeFrom(Allocator, Common, SB); + } else { + // We van reuse the L SubRange. + SB.LaneMask = Common; + CommonRange = &SB; + } + LiveRange RangeCopy(SB, Allocator); + + VNInfo *BSubValNo = CommonRange->getVNInfoAt(CopyIdx); + assert(BSubValNo->def == CopyIdx); + BSubValNo->def = ASubValNo->def; + addSegmentsWithValNo(*CommonRange, BSubValNo, SA, ASubValNo); + AMask &= ~BMask; + } + if (AMask != 0) { + DEBUG(dbgs() << format("\t\tNew Lane %04X\n", AMask)); + LiveRange *NewRange = IntB.createSubRange(Allocator, AMask); + VNInfo *BSubValNo = NewRange->getNextValue(CopyIdx, Allocator); + addSegmentsWithValNo(*NewRange, BSubValNo, SA, ASubValNo); + } + } } + + BValNo->def = AValNo->def; + addSegmentsWithValNo(IntB, BValNo, IntA, AValNo); DEBUG(dbgs() << "\t\textended: " << IntB << '\n'); - IntA.removeValNo(AValNo); + LIS->removeVRegDefAt(IntA, AValNo->def); + DEBUG(dbgs() << "\t\ttrimmed: " << IntA << '\n'); ++numCommutes; return true; } -/// If the source of a copy is defined by a trivial -/// computation, replace the copy by rematerialize the definition. +/// Returns true if @p MI defines the full vreg @p Reg, as opposed to just +/// defining a subregister. +static bool definesFullReg(const MachineInstr &MI, unsigned Reg) { + assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && + "This code cannot handle physreg aliasing"); + for (const MachineOperand &Op : MI.operands()) { + if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg) + continue; + // Return true if we define the full register or don't care about the value + // inside other subregisters. + if (Op.getSubReg() == 0 || Op.isUndef()) + return true; + } + return false; +} + bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, MachineInstr *CopyMI, bool &IsDefCopy) { @@ -755,6 +859,8 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, return false; if (!TII->isTriviallyReMaterializable(DefMI, AA)) return false; + if (!definesFullReg(*DefMI, SrcReg)) + return false; bool SawStore = false; if (!DefMI->isSafeToMove(TII, AA, SawStore)) return false; @@ -825,12 +931,13 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, const TargetRegisterClass *NewRC = CP.getNewRC(); unsigned NewIdx = NewMI->getOperand(0).getSubReg(); - if (NewIdx) - NewRC = TRI->getMatchingSuperRegClass(NewRC, DefRC, NewIdx); - else - NewRC = TRI->getCommonSubClass(NewRC, DefRC); - - assert(NewRC && "subreg chosen for remat incompatible with instruction"); + if (DefRC != nullptr) { + if (NewIdx) + NewRC = TRI->getMatchingSuperRegClass(NewRC, DefRC, NewIdx); + else + NewRC = TRI->getCommonSubClass(NewRC, DefRC); + assert(NewRC && "subreg chosen for remat incompatible with instruction"); + } MRI->setRegClass(DstReg, NewRC); updateRegDefsUses(DstReg, DstReg, DstIdx); @@ -898,56 +1005,103 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, // The source interval can become smaller because we removed a use. LIS->shrinkToUses(&SrcInt, &DeadDefs); - if (!DeadDefs.empty()) + if (!DeadDefs.empty()) { + // If the virtual SrcReg is completely eliminated, update all DBG_VALUEs + // to describe DstReg instead. + for (MachineOperand &UseMO : MRI->use_operands(SrcReg)) { + MachineInstr *UseMI = UseMO.getParent(); + if (UseMI->isDebugValue()) { + UseMO.setReg(DstReg); + DEBUG(dbgs() << "\t\tupdated: " << *UseMI); + } + } eliminateDeadDefs(); + } return true; } -/// ProcessImpicitDefs may leave some copies of <undef> -/// values, it only removes local variables. When we have a copy like: -/// -/// %vreg1 = COPY %vreg2<undef> -/// -/// We delete the copy and remove the corresponding value number from %vreg1. -/// Any uses of that value number are marked as <undef>. -bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI, - const CoalescerPair &CP) { +bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) { + // ProcessImpicitDefs may leave some copies of <undef> values, it only removes + // local variables. When we have a copy like: + // + // %vreg1 = COPY %vreg2<undef> + // + // We delete the copy and remove the corresponding value number from %vreg1. + // Any uses of that value number are marked as <undef>. + + // Note that we do not query CoalescerPair here but redo isMoveInstr as the + // CoalescerPair may have a new register class with adjusted subreg indices + // at this point. + unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; + isMoveInstr(*TRI, CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx); + SlotIndex Idx = LIS->getInstructionIndex(CopyMI); - LiveInterval *SrcInt = &LIS->getInterval(CP.getSrcReg()); - if (SrcInt->liveAt(Idx)) - return false; - LiveInterval *DstInt = &LIS->getInterval(CP.getDstReg()); - if (DstInt->liveAt(Idx)) + const LiveInterval &SrcLI = LIS->getInterval(SrcReg); + // CopyMI is undef iff SrcReg is not live before the instruction. + if (SrcSubIdx != 0 && SrcLI.hasSubRanges()) { + unsigned SrcMask = TRI->getSubRegIndexLaneMask(SrcSubIdx); + for (const LiveInterval::SubRange &SR : SrcLI.subranges()) { + if ((SR.LaneMask & SrcMask) == 0) + continue; + if (SR.liveAt(Idx)) + return false; + } + } else if (SrcLI.liveAt(Idx)) return false; - // No intervals are live-in to CopyMI - it is undef. - if (CP.isFlipped()) - DstInt = SrcInt; - SrcInt = nullptr; + DEBUG(dbgs() << "\tEliminating copy of <undef> value\n"); - VNInfo *DeadVNI = DstInt->getVNInfoAt(Idx.getRegSlot()); - assert(DeadVNI && "No value defined in DstInt"); - DstInt->removeValNo(DeadVNI); + // Remove any DstReg segments starting at the instruction. + LiveInterval &DstLI = LIS->getInterval(DstReg); + SlotIndex RegIndex = Idx.getRegSlot(); + // Remove value or merge with previous one in case of a subregister def. + if (VNInfo *PrevVNI = DstLI.getVNInfoAt(Idx)) { + VNInfo *VNI = DstLI.getVNInfoAt(RegIndex); + DstLI.MergeValueNumberInto(VNI, PrevVNI); - // Find new undef uses. - for (MachineOperand &MO : MRI->reg_nodbg_operands(DstInt->reg)) { - if (MO.isDef() || MO.isUndef()) + // The affected subregister segments can be removed. + unsigned DstMask = TRI->getSubRegIndexLaneMask(DstSubIdx); + for (LiveInterval::SubRange &SR : DstLI.subranges()) { + if ((SR.LaneMask & DstMask) == 0) + continue; + + VNInfo *SVNI = SR.getVNInfoAt(RegIndex); + assert(SVNI != nullptr && SlotIndex::isSameInstr(SVNI->def, RegIndex)); + SR.removeValNo(SVNI); + } + DstLI.removeEmptySubRanges(); + } else + LIS->removeVRegDefAt(DstLI, RegIndex); + + // Mark uses as undef. + for (MachineOperand &MO : MRI->reg_nodbg_operands(DstReg)) { + if (MO.isDef() /*|| MO.isUndef()*/) continue; - MachineInstr *MI = MO.getParent(); - SlotIndex Idx = LIS->getInstructionIndex(MI); - if (DstInt->liveAt(Idx)) + const MachineInstr &MI = *MO.getParent(); + SlotIndex UseIdx = LIS->getInstructionIndex(&MI); + unsigned UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg()); + bool isLive; + if (UseMask != ~0u && DstLI.hasSubRanges()) { + isLive = false; + for (const LiveInterval::SubRange &SR : DstLI.subranges()) { + if ((SR.LaneMask & UseMask) == 0) + continue; + if (SR.liveAt(UseIdx)) { + isLive = true; + break; + } + } + } else + isLive = DstLI.liveAt(UseIdx); + if (isLive) continue; MO.setIsUndef(true); - DEBUG(dbgs() << "\tnew undef: " << Idx << '\t' << *MI); + DEBUG(dbgs() << "\tnew undef: " << UseIdx << '\t' << MI); } return true; } -/// Replace all defs and uses of SrcReg to DstReg and update the subregister -/// number if it is not zero. If DstReg is a physical register and the existing -/// subregister number of the def / use being updated is not zero, make sure to -/// set it to the correct physical subregister. void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx) { @@ -987,6 +1141,40 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, if (SubIdx && MO.isDef()) MO.setIsUndef(!Reads); + // A subreg use of a partially undef (super) register may be a complete + // undef use now and then has to be marked that way. + if (SubIdx != 0 && MO.isUse() && MRI->tracksSubRegLiveness()) { + if (!DstInt->hasSubRanges()) { + BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); + unsigned Mask = MRI->getMaxLaneMaskForVReg(DstInt->reg); + DstInt->createSubRangeFrom(Allocator, Mask, *DstInt); + } + unsigned Mask = TRI->getSubRegIndexLaneMask(SubIdx); + bool IsUndef = true; + SlotIndex MIIdx = UseMI->isDebugValue() + ? LIS->getSlotIndexes()->getIndexBefore(UseMI) + : LIS->getInstructionIndex(UseMI); + SlotIndex UseIdx = MIIdx.getRegSlot(true); + for (LiveInterval::SubRange &S : DstInt->subranges()) { + if ((S.LaneMask & Mask) == 0) + continue; + if (S.liveAt(UseIdx)) { + IsUndef = false; + break; + } + } + if (IsUndef) { + MO.setIsUndef(true); + // We found out some subregister use is actually reading an undefined + // value. In some cases the whole vreg has become undefined at this + // point so we have to potentially shrink the main range if the + // use was ending a live segment there. + LiveQueryResult Q = DstInt->Query(MIIdx); + if (Q.valueOut() == nullptr) + ShrinkMainRange = true; + } + } + if (DstIsPhys) MO.substPhysReg(DstReg, *TRI); else @@ -1002,29 +1190,23 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, } } -/// Return true if a copy involving a physreg should be joined. bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) { - /// Always join simple intervals that are defined by a single copy from a - /// reserved register. This doesn't increase register pressure, so it is - /// always beneficial. + // Always join simple intervals that are defined by a single copy from a + // reserved register. This doesn't increase register pressure, so it is + // always beneficial. if (!MRI->isReserved(CP.getDstReg())) { DEBUG(dbgs() << "\tCan only merge into reserved registers.\n"); return false; } LiveInterval &JoinVInt = LIS->getInterval(CP.getSrcReg()); - if (CP.isFlipped() && JoinVInt.containsOneValue()) + if (JoinVInt.containsOneValue()) return true; - DEBUG(dbgs() << "\tCannot join defs into reserved register.\n"); + DEBUG(dbgs() << "\tCannot join complex intervals into reserved register.\n"); return false; } -/// Attempt to join intervals corresponding to SrcReg/DstReg, -/// which are the src/dst of the copy instruction CopyMI. This returns true -/// if the copy was successfully coalesced away. If it is not currently -/// possible to coalesce this interval, but it may be possible if other -/// things get coalesced, then it returns true by reference in 'Again'. bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { Again = false; @@ -1063,8 +1245,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { } // Eliminate undefs. - if (!CP.isPhys() && eliminateUndefCopy(CopyMI, CP)) { - DEBUG(dbgs() << "\tEliminated copy of <undef> value.\n"); + if (!CP.isPhys() && eliminateUndefCopy(CopyMI)) { LIS->RemoveMachineInstrFromMaps(CopyMI); CopyMI->eraseFromParent(); return false; // Not coalescable. @@ -1076,12 +1257,22 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { if (CP.getSrcReg() == CP.getDstReg()) { LiveInterval &LI = LIS->getInterval(CP.getSrcReg()); DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n'); - LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(CopyMI)); + const SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI); + LiveQueryResult LRQ = LI.Query(CopyIdx); if (VNInfo *DefVNI = LRQ.valueDefined()) { VNInfo *ReadVNI = LRQ.valueIn(); assert(ReadVNI && "No value before copy and no <undef> flag."); assert(ReadVNI != DefVNI && "Cannot read and define the same value."); LI.MergeValueNumberInto(DefVNI, ReadVNI); + + // Process subregister liveranges. + for (LiveInterval::SubRange &S : LI.subranges()) { + LiveQueryResult SLRQ = S.Query(CopyIdx); + if (VNInfo *SDefVNI = SLRQ.valueDefined()) { + VNInfo *SReadVNI = SLRQ.valueIn(); + S.MergeValueNumberInto(SDefVNI, SReadVNI); + } + } DEBUG(dbgs() << "\tMerged values: " << LI << '\n'); } LIS->RemoveMachineInstrFromMaps(CopyMI); @@ -1124,6 +1315,9 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { }); } + ShrinkMask = 0; + ShrinkMainRange = false; + // Okay, attempt to join these two intervals. On failure, this returns false. // Otherwise, if one of the intervals being joined is a physreg, this method // always canonicalizes DstInt to be it. The output "SrcInt" will not have @@ -1178,12 +1372,28 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx()); updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx()); + // Shrink subregister ranges if necessary. + if (ShrinkMask != 0) { + LiveInterval &LI = LIS->getInterval(CP.getDstReg()); + for (LiveInterval::SubRange &S : LI.subranges()) { + if ((S.LaneMask & ShrinkMask) == 0) + continue; + DEBUG(dbgs() << "Shrink LaneUses (Lane " + << format("%04X", S.LaneMask) << ")\n"); + LIS->shrinkToUses(S, LI.reg); + } + } + if (ShrinkMainRange) { + LiveInterval &LI = LIS->getInterval(CP.getDstReg()); + LIS->shrinkToUses(&LI); + } + // SrcReg is guaranteed to be the register whose live interval that is // being merged. LIS->removeInterval(CP.getSrcReg()); // Update regalloc hint. - TRI->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF); + TRI->updateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF); DEBUG({ dbgs() << "\tSuccess: " << PrintReg(CP.getSrcReg(), TRI, CP.getSrcIdx()) @@ -1200,24 +1410,23 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { return true; } -/// Attempt joining with a reserved physreg. bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { + unsigned DstReg = CP.getDstReg(); assert(CP.isPhys() && "Must be a physreg copy"); - assert(MRI->isReserved(CP.getDstReg()) && "Not a reserved register"); + assert(MRI->isReserved(DstReg) && "Not a reserved register"); LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); DEBUG(dbgs() << "\t\tRHS = " << RHS << '\n'); - assert(CP.isFlipped() && RHS.containsOneValue() && - "Invalid join with reserved register"); + assert(RHS.containsOneValue() && "Invalid join with reserved register"); // Optimization for reserved registers like ESP. We can only merge with a - // reserved physreg if RHS has a single value that is a copy of CP.DstReg(). + // reserved physreg if RHS has a single value that is a copy of DstReg. // The live range of the reserved register will look like a set of dead defs // - we don't properly track the live range of reserved registers. // Deny any overlapping intervals. This depends on all the reserved // register live ranges to look like dead defs. - for (MCRegUnitIterator UI(CP.getDstReg(), TRI); UI.isValid(); ++UI) + for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) if (RHS.overlaps(LIS->getRegUnit(*UI))) { DEBUG(dbgs() << "\t\tInterference: " << PrintRegUnit(*UI, TRI) << '\n'); return false; @@ -1229,7 +1438,46 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { // defs are there. // Delete the identity copy. - MachineInstr *CopyMI = MRI->getVRegDef(RHS.reg); + MachineInstr *CopyMI; + if (CP.isFlipped()) { + CopyMI = MRI->getVRegDef(RHS.reg); + } else { + if (!MRI->hasOneNonDBGUse(RHS.reg)) { + DEBUG(dbgs() << "\t\tMultiple vreg uses!\n"); + return false; + } + + MachineInstr *DestMI = MRI->getVRegDef(RHS.reg); + CopyMI = &*MRI->use_instr_nodbg_begin(RHS.reg); + const SlotIndex CopyRegIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); + const SlotIndex DestRegIdx = LIS->getInstructionIndex(DestMI).getRegSlot(); + + // We checked above that there are no interfering defs of the physical + // register. However, for this case, where we intent to move up the def of + // the physical register, we also need to check for interfering uses. + SlotIndexes *Indexes = LIS->getSlotIndexes(); + for (SlotIndex SI = Indexes->getNextNonNullIndex(DestRegIdx); + SI != CopyRegIdx; SI = Indexes->getNextNonNullIndex(SI)) { + MachineInstr *MI = LIS->getInstructionFromIndex(SI); + if (MI->readsRegister(DstReg, TRI)) { + DEBUG(dbgs() << "\t\tInterference (read): " << *MI); + return false; + } + } + + // We're going to remove the copy which defines a physical reserved + // register, so remove its valno, etc. + DEBUG(dbgs() << "\t\tRemoving phys reg def of " << DstReg << " at " + << CopyRegIdx << "\n"); + + LIS->removePhysRegDefAt(DstReg, CopyRegIdx); + // Create a new dead def at the new def location. + for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) { + LiveRange &LR = LIS->getRegUnit(*UI); + LR.createDeadDef(DestRegIdx, LIS->getVNInfoAllocator()); + } + } + LIS->RemoveMachineInstrFromMaps(CopyMI); CopyMI->eraseFromParent(); @@ -1306,15 +1554,29 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { namespace { /// Track information about values in a single virtual register about to be /// joined. Objects of this class are always created in pairs - one for each -/// side of the CoalescerPair. +/// side of the CoalescerPair (or one for each lane of a side of the coalescer +/// pair) class JoinVals { - LiveInterval &LI; - - // Location of this register in the final joined register. - // Either CP.DstIdx or CP.SrcIdx. - unsigned SubIdx; - - // Values that will be present in the final live range. + /// Live range we work on. + LiveRange &LR; + /// (Main) register we work on. + const unsigned Reg; + + /// Reg (and therefore the values in this liverange) will end up as + /// subregister SubIdx in the coalesced register. Either CP.DstIdx or + /// CP.SrcIdx. + const unsigned SubIdx; + /// The LaneMask that this liverange will occupy the coalesced register. May + /// be smaller than the lanemask produced by SubIdx when merging subranges. + const unsigned LaneMask; + + /// This is true when joining sub register ranges, false when joining main + /// ranges. + const bool SubRangeJoin; + /// Whether the current LiveInterval tracks subregister liveness. + const bool TrackSubRegLiveness; + + /// Values that will be present in the final live range. SmallVectorImpl<VNInfo*> &NewVNInfo; const CoalescerPair &CP; @@ -1322,75 +1584,75 @@ class JoinVals { SlotIndexes *Indexes; const TargetRegisterInfo *TRI; - // Value number assignments. Maps value numbers in LI to entries in NewVNInfo. - // This is suitable for passing to LiveInterval::join(). + /// Value number assignments. Maps value numbers in LI to entries in + /// NewVNInfo. This is suitable for passing to LiveInterval::join(). SmallVector<int, 8> Assignments; - // Conflict resolution for overlapping values. + /// Conflict resolution for overlapping values. enum ConflictResolution { - // No overlap, simply keep this value. + /// No overlap, simply keep this value. CR_Keep, - // Merge this value into OtherVNI and erase the defining instruction. - // Used for IMPLICIT_DEF, coalescable copies, and copies from external - // values. + /// Merge this value into OtherVNI and erase the defining instruction. + /// Used for IMPLICIT_DEF, coalescable copies, and copies from external + /// values. CR_Erase, - // Merge this value into OtherVNI but keep the defining instruction. - // This is for the special case where OtherVNI is defined by the same - // instruction. + /// Merge this value into OtherVNI but keep the defining instruction. + /// This is for the special case where OtherVNI is defined by the same + /// instruction. CR_Merge, - // Keep this value, and have it replace OtherVNI where possible. This - // complicates value mapping since OtherVNI maps to two different values - // before and after this def. - // Used when clobbering undefined or dead lanes. + /// Keep this value, and have it replace OtherVNI where possible. This + /// complicates value mapping since OtherVNI maps to two different values + /// before and after this def. + /// Used when clobbering undefined or dead lanes. CR_Replace, - // Unresolved conflict. Visit later when all values have been mapped. + /// Unresolved conflict. Visit later when all values have been mapped. CR_Unresolved, - // Unresolvable conflict. Abort the join. + /// Unresolvable conflict. Abort the join. CR_Impossible }; - // Per-value info for LI. The lane bit masks are all relative to the final - // joined register, so they can be compared directly between SrcReg and - // DstReg. + /// Per-value info for LI. The lane bit masks are all relative to the final + /// joined register, so they can be compared directly between SrcReg and + /// DstReg. struct Val { ConflictResolution Resolution; - // Lanes written by this def, 0 for unanalyzed values. + /// Lanes written by this def, 0 for unanalyzed values. unsigned WriteLanes; - // Lanes with defined values in this register. Other lanes are undef and - // safe to clobber. + /// Lanes with defined values in this register. Other lanes are undef and + /// safe to clobber. unsigned ValidLanes; - // Value in LI being redefined by this def. + /// Value in LI being redefined by this def. VNInfo *RedefVNI; - // Value in the other live range that overlaps this def, if any. + /// Value in the other live range that overlaps this def, if any. VNInfo *OtherVNI; - // Is this value an IMPLICIT_DEF that can be erased? - // - // IMPLICIT_DEF values should only exist at the end of a basic block that - // is a predecessor to a phi-value. These IMPLICIT_DEF instructions can be - // safely erased if they are overlapping a live value in the other live - // interval. - // - // Weird control flow graphs and incomplete PHI handling in - // ProcessImplicitDefs can very rarely create IMPLICIT_DEF values with - // longer live ranges. Such IMPLICIT_DEF values should be treated like - // normal values. + /// Is this value an IMPLICIT_DEF that can be erased? + /// + /// IMPLICIT_DEF values should only exist at the end of a basic block that + /// is a predecessor to a phi-value. These IMPLICIT_DEF instructions can be + /// safely erased if they are overlapping a live value in the other live + /// interval. + /// + /// Weird control flow graphs and incomplete PHI handling in + /// ProcessImplicitDefs can very rarely create IMPLICIT_DEF values with + /// longer live ranges. Such IMPLICIT_DEF values should be treated like + /// normal values. bool ErasableImplicitDef; - // True when the live range of this value will be pruned because of an - // overlapping CR_Replace value in the other live range. + /// True when the live range of this value will be pruned because of an + /// overlapping CR_Replace value in the other live range. bool Pruned; - // True once Pruned above has been computed. + /// True once Pruned above has been computed. bool PrunedComputed; Val() : Resolution(CR_Keep), WriteLanes(0), ValidLanes(0), @@ -1400,30 +1662,75 @@ class JoinVals { bool isAnalyzed() const { return WriteLanes != 0; } }; - // One entry per value number in LI. + /// One entry per value number in LI. SmallVector<Val, 8> Vals; - unsigned computeWriteLanes(const MachineInstr *DefMI, bool &Redef); - VNInfo *stripCopies(VNInfo *VNI); + /// Compute the bitmask of lanes actually written by DefMI. + /// Set Redef if there are any partial register definitions that depend on the + /// previous value of the register. + unsigned computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const; + + /// Find the ultimate value that VNI was copied from. + std::pair<const VNInfo*,unsigned> followCopyChain(const VNInfo *VNI) const; + + bool valuesIdentical(VNInfo *Val0, VNInfo *Val1, const JoinVals &Other) const; + + /// Analyze ValNo in this live range, and set all fields of Vals[ValNo]. + /// Return a conflict resolution when possible, but leave the hard cases as + /// CR_Unresolved. + /// Recursively calls computeAssignment() on this and Other, guaranteeing that + /// both OtherVNI and RedefVNI have been analyzed and mapped before returning. + /// The recursion always goes upwards in the dominator tree, making loops + /// impossible. ConflictResolution analyzeValue(unsigned ValNo, JoinVals &Other); + + /// Compute the value assignment for ValNo in RI. + /// This may be called recursively by analyzeValue(), but never for a ValNo on + /// the stack. void computeAssignment(unsigned ValNo, JoinVals &Other); + + /// Assuming ValNo is going to clobber some valid lanes in Other.LR, compute + /// the extent of the tainted lanes in the block. + /// + /// Multiple values in Other.LR can be affected since partial redefinitions + /// can preserve previously tainted lanes. + /// + /// 1 %dst = VLOAD <-- Define all lanes in %dst + /// 2 %src = FOO <-- ValNo to be joined with %dst:ssub0 + /// 3 %dst:ssub1 = BAR <-- Partial redef doesn't clear taint in ssub0 + /// 4 %dst:ssub0 = COPY %src <-- Conflict resolved, ssub0 wasn't read + /// + /// For each ValNo in Other that is affected, add an (EndIndex, TaintedLanes) + /// entry to TaintedVals. + /// + /// Returns false if the tainted lanes extend beyond the basic block. bool taintExtent(unsigned, unsigned, JoinVals&, SmallVectorImpl<std::pair<SlotIndex, unsigned> >&); - bool usesLanes(MachineInstr *MI, unsigned, unsigned, unsigned); + + /// Return true if MI uses any of the given Lanes from Reg. + /// This does not include partial redefinitions of Reg. + bool usesLanes(const MachineInstr *MI, unsigned, unsigned, unsigned) const; + + /// Determine if ValNo is a copy of a value number in LR or Other.LR that will + /// be pruned: + /// + /// %dst = COPY %src + /// %src = COPY %dst <-- This value to be pruned. + /// %dst = COPY %src <-- This value is a copy of a pruned value. bool isPrunedValue(unsigned ValNo, JoinVals &Other); public: - JoinVals(LiveInterval &li, unsigned subIdx, - SmallVectorImpl<VNInfo*> &newVNInfo, - const CoalescerPair &cp, - LiveIntervals *lis, - const TargetRegisterInfo *tri) - : LI(li), SubIdx(subIdx), NewVNInfo(newVNInfo), CP(cp), LIS(lis), - Indexes(LIS->getSlotIndexes()), TRI(tri), - Assignments(LI.getNumValNums(), -1), Vals(LI.getNumValNums()) + JoinVals(LiveRange &LR, unsigned Reg, unsigned SubIdx, unsigned LaneMask, + SmallVectorImpl<VNInfo*> &newVNInfo, const CoalescerPair &cp, + LiveIntervals *lis, const TargetRegisterInfo *TRI, bool SubRangeJoin, + bool TrackSubRegLiveness) + : LR(LR), Reg(Reg), SubIdx(SubIdx), LaneMask(LaneMask), + SubRangeJoin(SubRangeJoin), TrackSubRegLiveness(TrackSubRegLiveness), + NewVNInfo(newVNInfo), CP(cp), LIS(lis), Indexes(LIS->getSlotIndexes()), + TRI(TRI), Assignments(LR.getNumValNums(), -1), Vals(LR.getNumValNums()) {} - /// Analyze defs in LI and compute a value mapping in NewVNInfo. + /// Analyze defs in LR and compute a value mapping in NewVNInfo. /// Returns false if any conflicts were impossible to resolve. bool mapValues(JoinVals &Other); @@ -1431,10 +1738,16 @@ public: /// Returns false if any conflicts were impossible to resolve. bool resolveConflicts(JoinVals &Other); - /// Prune the live range of values in Other.LI where they would conflict with - /// CR_Replace values in LI. Collect end points for restoring the live range + /// Prune the live range of values in Other.LR where they would conflict with + /// CR_Replace values in LR. Collect end points for restoring the live range /// after joining. - void pruneValues(JoinVals &Other, SmallVectorImpl<SlotIndex> &EndPoints); + void pruneValues(JoinVals &Other, SmallVectorImpl<SlotIndex> &EndPoints, + bool changeInstrs); + + /// Removes subranges starting at copies that get removed. This sometimes + /// happens when undefined subranges are copied around. These ranges contain + /// no usefull information and can be removed. + void pruneSubRegValues(LiveInterval &LI, unsigned &ShrinkMask); /// Erase any machine instructions that have been coalesced away. /// Add erased instructions to ErasedInstrs. @@ -1448,13 +1761,11 @@ public: }; } // end anonymous namespace -/// Compute the bitmask of lanes actually written by DefMI. -/// Set Redef if there are any partial register definitions that depend on the -/// previous value of the register. -unsigned JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef) { +unsigned JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef) + const { unsigned L = 0; for (ConstMIOperands MO(DefMI); MO.isValid(); ++MO) { - if (!MO->isReg() || MO->getReg() != LI.reg || !MO->isDef()) + if (!MO->isReg() || MO->getReg() != Reg || !MO->isDef()) continue; L |= TRI->getSubRegIndexLaneMask( TRI->composeSubRegIndices(SubIdx, MO->getSubReg())); @@ -1464,36 +1775,71 @@ unsigned JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef) { return L; } -/// Find the ultimate value that VNI was copied from. -VNInfo *JoinVals::stripCopies(VNInfo *VNI) { +std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain( + const VNInfo *VNI) const { + unsigned Reg = this->Reg; + while (!VNI->isPHIDef()) { - MachineInstr *MI = Indexes->getInstructionFromIndex(VNI->def); + SlotIndex Def = VNI->def; + MachineInstr *MI = Indexes->getInstructionFromIndex(Def); assert(MI && "No defining instruction"); if (!MI->isFullCopy()) + return std::make_pair(VNI, Reg); + unsigned SrcReg = MI->getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + return std::make_pair(VNI, Reg); + + const LiveInterval &LI = LIS->getInterval(SrcReg); + const VNInfo *ValueIn; + // No subrange involved. + if (!SubRangeJoin || !LI.hasSubRanges()) { + LiveQueryResult LRQ = LI.Query(Def); + ValueIn = LRQ.valueIn(); + } else { + // Query subranges. Pick the first matching one. + ValueIn = nullptr; + for (const LiveInterval::SubRange &S : LI.subranges()) { + // Transform lanemask to a mask in the joined live interval. + unsigned SMask = TRI->composeSubRegIndexLaneMask(SubIdx, S.LaneMask); + if ((SMask & LaneMask) == 0) + continue; + LiveQueryResult LRQ = S.Query(Def); + ValueIn = LRQ.valueIn(); + break; + } + } + if (ValueIn == nullptr) break; - unsigned Reg = MI->getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) - break; - LiveQueryResult LRQ = LIS->getInterval(Reg).Query(VNI->def); - if (!LRQ.valueIn()) - break; - VNI = LRQ.valueIn(); + VNI = ValueIn; + Reg = SrcReg; } - return VNI; + return std::make_pair(VNI, Reg); +} + +bool JoinVals::valuesIdentical(VNInfo *Value0, VNInfo *Value1, + const JoinVals &Other) const { + const VNInfo *Orig0; + unsigned Reg0; + std::tie(Orig0, Reg0) = followCopyChain(Value0); + if (Orig0 == Value1) + return true; + + const VNInfo *Orig1; + unsigned Reg1; + std::tie(Orig1, Reg1) = Other.followCopyChain(Value1); + + // The values are equal if they are defined at the same place and use the + // same register. Note that we cannot compare VNInfos directly as some of + // them might be from a copy created in mergeSubRangeInto() while the other + // is from the original LiveInterval. + return Orig0->def == Orig1->def && Reg0 == Reg1; } -/// Analyze ValNo in this live range, and set all fields of Vals[ValNo]. -/// Return a conflict resolution when possible, but leave the hard cases as -/// CR_Unresolved. -/// Recursively calls computeAssignment() on this and Other, guaranteeing that -/// both OtherVNI and RedefVNI have been analyzed and mapped before returning. -/// The recursion always goes upwards in the dominator tree, making loops -/// impossible. JoinVals::ConflictResolution JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { Val &V = Vals[ValNo]; assert(!V.isAnalyzed() && "Value has already been analyzed!"); - VNInfo *VNI = LI.getValNumInfo(ValNo); + VNInfo *VNI = LR.getValNumInfo(ValNo); if (VNI->isUnused()) { V.WriteLanes = ~0u; return CR_Keep; @@ -1503,46 +1849,56 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { const MachineInstr *DefMI = nullptr; if (VNI->isPHIDef()) { // Conservatively assume that all lanes in a PHI are valid. - V.ValidLanes = V.WriteLanes = TRI->getSubRegIndexLaneMask(SubIdx); + unsigned Lanes = SubRangeJoin ? 1 : TRI->getSubRegIndexLaneMask(SubIdx); + V.ValidLanes = V.WriteLanes = Lanes; } else { DefMI = Indexes->getInstructionFromIndex(VNI->def); - bool Redef = false; - V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef); - - // If this is a read-modify-write instruction, there may be more valid - // lanes than the ones written by this instruction. - // This only covers partial redef operands. DefMI may have normal use - // operands reading the register. They don't contribute valid lanes. - // - // This adds ssub1 to the set of valid lanes in %src: - // - // %src:ssub1<def> = FOO - // - // This leaves only ssub1 valid, making any other lanes undef: - // - // %src:ssub1<def,read-undef> = FOO %src:ssub2 - // - // The <read-undef> flag on the def operand means that old lane values are - // not important. - if (Redef) { - V.RedefVNI = LI.Query(VNI->def).valueIn(); - assert(V.RedefVNI && "Instruction is reading nonexistent value"); - computeAssignment(V.RedefVNI->id, Other); - V.ValidLanes |= Vals[V.RedefVNI->id].ValidLanes; - } + assert(DefMI != nullptr); + if (SubRangeJoin) { + // We don't care about the lanes when joining subregister ranges. + V.ValidLanes = V.WriteLanes = 1; + } else { + bool Redef = false; + V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef); + + // If this is a read-modify-write instruction, there may be more valid + // lanes than the ones written by this instruction. + // This only covers partial redef operands. DefMI may have normal use + // operands reading the register. They don't contribute valid lanes. + // + // This adds ssub1 to the set of valid lanes in %src: + // + // %src:ssub1<def> = FOO + // + // This leaves only ssub1 valid, making any other lanes undef: + // + // %src:ssub1<def,read-undef> = FOO %src:ssub2 + // + // The <read-undef> flag on the def operand means that old lane values are + // not important. + if (Redef) { + V.RedefVNI = LR.Query(VNI->def).valueIn(); + assert((TrackSubRegLiveness || V.RedefVNI) && + "Instruction is reading nonexistent value"); + if (V.RedefVNI != nullptr) { + computeAssignment(V.RedefVNI->id, Other); + V.ValidLanes |= Vals[V.RedefVNI->id].ValidLanes; + } + } - // An IMPLICIT_DEF writes undef values. - if (DefMI->isImplicitDef()) { - // We normally expect IMPLICIT_DEF values to be live only until the end - // of their block. If the value is really live longer and gets pruned in - // another block, this flag is cleared again. - V.ErasableImplicitDef = true; - V.ValidLanes &= ~V.WriteLanes; + // An IMPLICIT_DEF writes undef values. + if (DefMI->isImplicitDef()) { + // We normally expect IMPLICIT_DEF values to be live only until the end + // of their block. If the value is really live longer and gets pruned in + // another block, this flag is cleared again. + V.ErasableImplicitDef = true; + V.ValidLanes &= ~V.WriteLanes; + } } } // Find the value in Other that overlaps VNI->def, if any. - LiveQueryResult OtherLRQ = Other.LI.Query(VNI->def); + LiveQueryResult OtherLRQ = Other.LR.Query(VNI->def); // It is possible that both values are defined by the same instruction, or // the values are PHIs defined in the same block. When that happens, the two @@ -1612,8 +1968,14 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { return CR_Replace; // Check for simple erasable conflicts. - if (DefMI->isImplicitDef()) + if (DefMI->isImplicitDef()) { + // We need the def for the subregister if there is nothing else live at the + // subrange at this point. + if (TrackSubRegLiveness + && (V.WriteLanes & (OtherV.ValidLanes | OtherV.WriteLanes)) == 0) + return CR_Replace; return CR_Erase; + } // Include the non-conflict where DefMI is a coalescable copy that kills // OtherVNI. We still want the copy erased and value numbers merged. @@ -1634,8 +1996,8 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { // %other = COPY %ext // %this = COPY %ext <-- Erase this copy // - if (DefMI->isFullCopy() && !CP.isPartial() && - stripCopies(VNI) == stripCopies(V.OtherVNI)) + if (DefMI->isFullCopy() && !CP.isPartial() + && valuesIdentical(VNI, V.OtherVNI, Other)) return CR_Erase; // If the lanes written by this instruction were all undef in OtherVNI, it is @@ -1670,7 +2032,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { // VNI is clobbering live lanes in OtherVNI, but there is still the // possibility that no instructions actually read the clobbered lanes. // If we're clobbering all the lanes in OtherVNI, at least one must be read. - // Otherwise Other.LI wouldn't be live here. + // Otherwise Other.RI wouldn't be live here. if ((TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes) == 0) return CR_Impossible; @@ -1691,9 +2053,6 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { return CR_Unresolved; } -/// Compute the value assignment for ValNo in LI. -/// This may be called recursively by analyzeValue(), but never for a ValNo on -/// the stack. void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) { Val &V = Vals[ValNo]; if (V.isAnalyzed()) { @@ -1709,73 +2068,64 @@ void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) { assert(V.OtherVNI && "OtherVNI not assigned, can't merge."); assert(Other.Vals[V.OtherVNI->id].isAnalyzed() && "Missing recursion"); Assignments[ValNo] = Other.Assignments[V.OtherVNI->id]; - DEBUG(dbgs() << "\t\tmerge " << PrintReg(LI.reg) << ':' << ValNo << '@' - << LI.getValNumInfo(ValNo)->def << " into " - << PrintReg(Other.LI.reg) << ':' << V.OtherVNI->id << '@' + DEBUG(dbgs() << "\t\tmerge " << PrintReg(Reg) << ':' << ValNo << '@' + << LR.getValNumInfo(ValNo)->def << " into " + << PrintReg(Other.Reg) << ':' << V.OtherVNI->id << '@' << V.OtherVNI->def << " --> @" << NewVNInfo[Assignments[ValNo]]->def << '\n'); break; case CR_Replace: - case CR_Unresolved: + case CR_Unresolved: { // The other value is going to be pruned if this join is successful. assert(V.OtherVNI && "OtherVNI not assigned, can't prune"); - Other.Vals[V.OtherVNI->id].Pruned = true; + Val &OtherV = Other.Vals[V.OtherVNI->id]; + // We cannot erase an IMPLICIT_DEF if we don't have valid values for all + // its lanes. + if ((OtherV.WriteLanes & ~V.ValidLanes) != 0 && TrackSubRegLiveness) + OtherV.ErasableImplicitDef = false; + OtherV.Pruned = true; + } // Fall through. default: // This value number needs to go in the final joined live range. Assignments[ValNo] = NewVNInfo.size(); - NewVNInfo.push_back(LI.getValNumInfo(ValNo)); + NewVNInfo.push_back(LR.getValNumInfo(ValNo)); break; } } bool JoinVals::mapValues(JoinVals &Other) { - for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) { + for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { computeAssignment(i, Other); if (Vals[i].Resolution == CR_Impossible) { - DEBUG(dbgs() << "\t\tinterference at " << PrintReg(LI.reg) << ':' << i - << '@' << LI.getValNumInfo(i)->def << '\n'); + DEBUG(dbgs() << "\t\tinterference at " << PrintReg(Reg) << ':' << i + << '@' << LR.getValNumInfo(i)->def << '\n'); return false; } } return true; } -/// Assuming ValNo is going to clobber some valid lanes in Other.LI, compute -/// the extent of the tainted lanes in the block. -/// -/// Multiple values in Other.LI can be affected since partial redefinitions can -/// preserve previously tainted lanes. -/// -/// 1 %dst = VLOAD <-- Define all lanes in %dst -/// 2 %src = FOO <-- ValNo to be joined with %dst:ssub0 -/// 3 %dst:ssub1 = BAR <-- Partial redef doesn't clear taint in ssub0 -/// 4 %dst:ssub0 = COPY %src <-- Conflict resolved, ssub0 wasn't read -/// -/// For each ValNo in Other that is affected, add an (EndIndex, TaintedLanes) -/// entry to TaintedVals. -/// -/// Returns false if the tainted lanes extend beyond the basic block. bool JoinVals:: taintExtent(unsigned ValNo, unsigned TaintedLanes, JoinVals &Other, SmallVectorImpl<std::pair<SlotIndex, unsigned> > &TaintExtent) { - VNInfo *VNI = LI.getValNumInfo(ValNo); + VNInfo *VNI = LR.getValNumInfo(ValNo); MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def); SlotIndex MBBEnd = Indexes->getMBBEndIdx(MBB); - // Scan Other.LI from VNI.def to MBBEnd. - LiveInterval::iterator OtherI = Other.LI.find(VNI->def); - assert(OtherI != Other.LI.end() && "No conflict?"); + // Scan Other.LR from VNI.def to MBBEnd. + LiveInterval::iterator OtherI = Other.LR.find(VNI->def); + assert(OtherI != Other.LR.end() && "No conflict?"); do { // OtherI is pointing to a tainted value. Abort the join if the tainted // lanes escape the block. SlotIndex End = OtherI->end; if (End >= MBBEnd) { - DEBUG(dbgs() << "\t\ttaints global " << PrintReg(Other.LI.reg) << ':' + DEBUG(dbgs() << "\t\ttaints global " << PrintReg(Other.Reg) << ':' << OtherI->valno->id << '@' << OtherI->start << '\n'); return false; } - DEBUG(dbgs() << "\t\ttaints local " << PrintReg(Other.LI.reg) << ':' + DEBUG(dbgs() << "\t\ttaints local " << PrintReg(Other.Reg) << ':' << OtherI->valno->id << '@' << OtherI->start << " to " << End << '\n'); // A dead def is not a problem. @@ -1784,7 +2134,7 @@ taintExtent(unsigned ValNo, unsigned TaintedLanes, JoinVals &Other, TaintExtent.push_back(std::make_pair(End, TaintedLanes)); // Check for another def in the MBB. - if (++OtherI == Other.LI.end() || OtherI->start >= MBBEnd) + if (++OtherI == Other.LR.end() || OtherI->start >= MBBEnd) break; // Lanes written by the new def are no longer tainted. @@ -1796,10 +2146,8 @@ taintExtent(unsigned ValNo, unsigned TaintedLanes, JoinVals &Other, return true; } -/// Return true if MI uses any of the given Lanes from Reg. -/// This does not include partial redefinitions of Reg. -bool JoinVals::usesLanes(MachineInstr *MI, unsigned Reg, unsigned SubIdx, - unsigned Lanes) { +bool JoinVals::usesLanes(const MachineInstr *MI, unsigned Reg, unsigned SubIdx, + unsigned Lanes) const { if (MI->isDebugValue()) return false; for (ConstMIOperands MO(MI); MO.isValid(); ++MO) { @@ -1815,16 +2163,19 @@ bool JoinVals::usesLanes(MachineInstr *MI, unsigned Reg, unsigned SubIdx, } bool JoinVals::resolveConflicts(JoinVals &Other) { - for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) { + for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { Val &V = Vals[i]; assert (V.Resolution != CR_Impossible && "Unresolvable conflict"); if (V.Resolution != CR_Unresolved) continue; - DEBUG(dbgs() << "\t\tconflict at " << PrintReg(LI.reg) << ':' << i - << '@' << LI.getValNumInfo(i)->def << '\n'); + DEBUG(dbgs() << "\t\tconflict at " << PrintReg(Reg) << ':' << i + << '@' << LR.getValNumInfo(i)->def << '\n'); + if (SubRangeJoin) + return false; + ++NumLaneConflicts; assert(V.OtherVNI && "Inconsistent conflict resolution."); - VNInfo *VNI = LI.getValNumInfo(i); + VNInfo *VNI = LR.getValNumInfo(i); const Val &OtherV = Other.Vals[V.OtherVNI->id]; // VNI is known to clobber some lanes in OtherVNI. If we go ahead with the @@ -1854,7 +2205,7 @@ bool JoinVals::resolveConflicts(JoinVals &Other) { unsigned TaintNum = 0; for(;;) { assert(MI != MBB->end() && "Bad LastMI"); - if (usesLanes(MI, Other.LI.reg, Other.SubIdx, TaintedLanes)) { + if (usesLanes(MI, Other.Reg, Other.SubIdx, TaintedLanes)) { DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI); return false; } @@ -1876,13 +2227,6 @@ bool JoinVals::resolveConflicts(JoinVals &Other) { return true; } -// Determine if ValNo is a copy of a value number in LI or Other.LI that will -// be pruned: -// -// %dst = COPY %src -// %src = COPY %dst <-- This value to be pruned. -// %dst = COPY %src <-- This value is a copy of a pruned value. -// bool JoinVals::isPrunedValue(unsigned ValNo, JoinVals &Other) { Val &V = Vals[ValNo]; if (V.Pruned || V.PrunedComputed) @@ -1899,15 +2243,16 @@ bool JoinVals::isPrunedValue(unsigned ValNo, JoinVals &Other) { } void JoinVals::pruneValues(JoinVals &Other, - SmallVectorImpl<SlotIndex> &EndPoints) { - for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) { - SlotIndex Def = LI.getValNumInfo(i)->def; + SmallVectorImpl<SlotIndex> &EndPoints, + bool changeInstrs) { + for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { + SlotIndex Def = LR.getValNumInfo(i)->def; switch (Vals[i].Resolution) { case CR_Keep: break; case CR_Replace: { - // This value takes precedence over the value in Other.LI. - LIS->pruneValue(&Other.LI, Def, &EndPoints); + // This value takes precedence over the value in Other.LR. + LIS->pruneValue(Other.LR, Def, &EndPoints); // Check if we're replacing an IMPLICIT_DEF value. The IMPLICIT_DEF // instructions are only inserted to provide a live-out value for PHI // predecessors, so the instruction should simply go away once its value @@ -1916,34 +2261,37 @@ void JoinVals::pruneValues(JoinVals &Other, bool EraseImpDef = OtherV.ErasableImplicitDef && OtherV.Resolution == CR_Keep; if (!Def.isBlock()) { - // Remove <def,read-undef> flags. This def is now a partial redef. - // Also remove <def,dead> flags since the joined live range will - // continue past this instruction. - for (MIOperands MO(Indexes->getInstructionFromIndex(Def)); - MO.isValid(); ++MO) - if (MO->isReg() && MO->isDef() && MO->getReg() == LI.reg) { - MO->setIsUndef(EraseImpDef); - MO->setIsDead(false); + if (changeInstrs) { + // Remove <def,read-undef> flags. This def is now a partial redef. + // Also remove <def,dead> flags since the joined live range will + // continue past this instruction. + for (MIOperands MO(Indexes->getInstructionFromIndex(Def)); + MO.isValid(); ++MO) { + if (MO->isReg() && MO->isDef() && MO->getReg() == Reg) { + MO->setIsUndef(EraseImpDef); + MO->setIsDead(false); + } } + } // This value will reach instructions below, but we need to make sure // the live range also reaches the instruction at Def. if (!EraseImpDef) EndPoints.push_back(Def); } - DEBUG(dbgs() << "\t\tpruned " << PrintReg(Other.LI.reg) << " at " << Def - << ": " << Other.LI << '\n'); + DEBUG(dbgs() << "\t\tpruned " << PrintReg(Other.Reg) << " at " << Def + << ": " << Other.LR << '\n'); break; } case CR_Erase: case CR_Merge: if (isPrunedValue(i, Other)) { - // This value is ultimately a copy of a pruned value in LI or Other.LI. + // This value is ultimately a copy of a pruned value in LR or Other.LR. // We can no longer trust the value mapping computed by // computeAssignment(), the value that was originally copied could have // been replaced. - LIS->pruneValue(&LI, Def, &EndPoints); - DEBUG(dbgs() << "\t\tpruned all of " << PrintReg(LI.reg) << " at " - << Def << ": " << LI << '\n'); + LIS->pruneValue(LR, Def, &EndPoints); + DEBUG(dbgs() << "\t\tpruned all of " << PrintReg(Reg) << " at " + << Def << ": " << LR << '\n'); } break; case CR_Unresolved: @@ -1953,25 +2301,65 @@ void JoinVals::pruneValues(JoinVals &Other, } } +void JoinVals::pruneSubRegValues(LiveInterval &LI, unsigned &ShrinkMask) +{ + // Look for values being erased. + bool DidPrune = false; + for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { + if (Vals[i].Resolution != CR_Erase) + continue; + + // Check subranges at the point where the copy will be removed. + SlotIndex Def = LR.getValNumInfo(i)->def; + for (LiveInterval::SubRange &S : LI.subranges()) { + LiveQueryResult Q = S.Query(Def); + + // If a subrange starts at the copy then an undefined value has been + // copied and we must remove that subrange value as well. + VNInfo *ValueOut = Q.valueOutOrDead(); + if (ValueOut != nullptr && Q.valueIn() == nullptr) { + DEBUG(dbgs() << "\t\tPrune sublane " << format("%04X", S.LaneMask) + << " at " << Def << "\n"); + LIS->pruneValue(S, Def, nullptr); + DidPrune = true; + // Mark value number as unused. + ValueOut->markUnused(); + continue; + } + // If a subrange ends at the copy, then a value was copied but only + // partially used later. Shrink the subregister range apropriately. + if (Q.valueIn() != nullptr && Q.valueOut() == nullptr) { + DEBUG(dbgs() << "\t\tDead uses at sublane " + << format("%04X", S.LaneMask) << " at " << Def << "\n"); + ShrinkMask |= S.LaneMask; + } + } + } + if (DidPrune) + LI.removeEmptySubRanges(); +} + void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs, SmallVectorImpl<unsigned> &ShrinkRegs) { - for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) { + for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { // Get the def location before markUnused() below invalidates it. - SlotIndex Def = LI.getValNumInfo(i)->def; + SlotIndex Def = LR.getValNumInfo(i)->def; switch (Vals[i].Resolution) { - case CR_Keep: + case CR_Keep: { // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any // longer. The IMPLICIT_DEF instructions are only inserted by // PHIElimination to guarantee that all PHI predecessors have a value. if (!Vals[i].ErasableImplicitDef || !Vals[i].Pruned) break; - // Remove value number i from LI. Note that this VNInfo is still present - // in NewVNInfo, so it will appear as an unused value number in the final - // joined interval. - LI.getValNumInfo(i)->markUnused(); - LI.removeValNo(LI.getValNumInfo(i)); - DEBUG(dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LI << '\n'); + // Remove value number i from LR. + VNInfo *VNI = LR.getValNumInfo(i); + LR.removeValNo(VNI); + // Note that this VNInfo is reused and still referenced in NewVNInfo, + // make it appear like an unused value number. + VNI->markUnused(); + DEBUG(dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LR << '\n'); // FALL THROUGH. + } case CR_Erase: { MachineInstr *MI = Indexes->getInstructionFromIndex(Def); @@ -1994,12 +2382,96 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs, } } +void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, + unsigned LaneMask, + const CoalescerPair &CP) { + SmallVector<VNInfo*, 16> NewVNInfo; + JoinVals RHSVals(RRange, CP.getSrcReg(), CP.getSrcIdx(), LaneMask, + NewVNInfo, CP, LIS, TRI, true, true); + JoinVals LHSVals(LRange, CP.getDstReg(), CP.getDstIdx(), LaneMask, + NewVNInfo, CP, LIS, TRI, true, true); + + // Compute NewVNInfo and resolve conflicts (see also joinVirtRegs()) + // Conflicts should already be resolved so the mapping/resolution should + // always succeed. + if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) + llvm_unreachable("Can't join subrange although main ranges are compatible"); + if (!LHSVals.resolveConflicts(RHSVals) || !RHSVals.resolveConflicts(LHSVals)) + llvm_unreachable("Can't join subrange although main ranges are compatible"); + + // The merging algorithm in LiveInterval::join() can't handle conflicting + // value mappings, so we need to remove any live ranges that overlap a + // CR_Replace resolution. Collect a set of end points that can be used to + // restore the live range after joining. + SmallVector<SlotIndex, 8> EndPoints; + LHSVals.pruneValues(RHSVals, EndPoints, false); + RHSVals.pruneValues(LHSVals, EndPoints, false); + + LRange.verify(); + RRange.verify(); + + // Join RRange into LHS. + LRange.join(RRange, LHSVals.getAssignments(), RHSVals.getAssignments(), + NewVNInfo); + + DEBUG(dbgs() << "\t\tjoined lanes: " << LRange << "\n"); + if (EndPoints.empty()) + return; + + // Recompute the parts of the live range we had to remove because of + // CR_Replace conflicts. + DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size() + << " points: " << LRange << '\n'); + LIS->extendToIndices(LRange, EndPoints); +} + +void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI, + const LiveRange &ToMerge, + unsigned LaneMask, CoalescerPair &CP) { + BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); + for (LiveInterval::SubRange &R : LI.subranges()) { + unsigned RMask = R.LaneMask; + // LaneMask of subregisters common to subrange R and ToMerge. + unsigned Common = RMask & LaneMask; + // There is nothing to do without common subregs. + if (Common == 0) + continue; + + DEBUG(dbgs() << format("\t\tCopy+Merge %04X into %04X\n", RMask, Common)); + // LaneMask of subregisters contained in the R range but not in ToMerge, + // they have to split into their own subrange. + unsigned LRest = RMask & ~LaneMask; + LiveInterval::SubRange *CommonRange; + if (LRest != 0) { + R.LaneMask = LRest; + DEBUG(dbgs() << format("\t\tReduce Lane to %04X\n", LRest)); + // Duplicate SubRange for newly merged common stuff. + CommonRange = LI.createSubRangeFrom(Allocator, Common, R); + } else { + // Reuse the existing range. + R.LaneMask = Common; + CommonRange = &R; + } + LiveRange RangeCopy(ToMerge, Allocator); + joinSubRegRanges(*CommonRange, RangeCopy, Common, CP); + LaneMask &= ~RMask; + } + + if (LaneMask != 0) { + DEBUG(dbgs() << format("\t\tNew Lane %04X\n", LaneMask)); + LI.createSubRangeFrom(Allocator, LaneMask, ToMerge); + } +} + bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { SmallVector<VNInfo*, 16> NewVNInfo; LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); LiveInterval &LHS = LIS->getInterval(CP.getDstReg()); - JoinVals RHSVals(RHS, CP.getSrcIdx(), NewVNInfo, CP, LIS, TRI); - JoinVals LHSVals(LHS, CP.getDstIdx(), NewVNInfo, CP, LIS, TRI); + bool TrackSubRegLiveness = MRI->tracksSubRegLiveness(); + JoinVals RHSVals(RHS, CP.getSrcReg(), CP.getSrcIdx(), 0, NewVNInfo, CP, LIS, + TRI, false, TrackSubRegLiveness); + JoinVals LHSVals(LHS, CP.getDstReg(), CP.getDstIdx(), 0, NewVNInfo, CP, LIS, + TRI, false, TrackSubRegLiveness); DEBUG(dbgs() << "\t\tRHS = " << RHS << "\n\t\tLHS = " << LHS @@ -2015,14 +2487,55 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { return false; // All clear, the live ranges can be merged. + if (RHS.hasSubRanges() || LHS.hasSubRanges()) { + BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); + + // Transform lanemasks from the LHS to masks in the coalesced register and + // create initial subranges if necessary. + unsigned DstIdx = CP.getDstIdx(); + if (!LHS.hasSubRanges()) { + unsigned Mask = DstIdx == 0 ? CP.getNewRC()->getLaneMask() + : TRI->getSubRegIndexLaneMask(DstIdx); + // LHS must support subregs or we wouldn't be in this codepath. + assert(Mask != 0); + LHS.createSubRangeFrom(Allocator, Mask, LHS); + } else if (DstIdx != 0) { + // Transform LHS lanemasks to new register class if necessary. + for (LiveInterval::SubRange &R : LHS.subranges()) { + unsigned Mask = TRI->composeSubRegIndexLaneMask(DstIdx, R.LaneMask); + R.LaneMask = Mask; + } + } + DEBUG(dbgs() << "\t\tLHST = " << PrintReg(CP.getDstReg()) + << ' ' << LHS << '\n'); + + // Determine lanemasks of RHS in the coalesced register and merge subranges. + unsigned SrcIdx = CP.getSrcIdx(); + if (!RHS.hasSubRanges()) { + unsigned Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask() + : TRI->getSubRegIndexLaneMask(SrcIdx); + mergeSubRangeInto(LHS, RHS, Mask, CP); + } else { + // Pair up subranges and merge. + for (LiveInterval::SubRange &R : RHS.subranges()) { + unsigned Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask); + mergeSubRangeInto(LHS, R, Mask, CP); + } + } + + DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n"); + + LHSVals.pruneSubRegValues(LHS, ShrinkMask); + RHSVals.pruneSubRegValues(LHS, ShrinkMask); + } // The merging algorithm in LiveInterval::join() can't handle conflicting // value mappings, so we need to remove any live ranges that overlap a // CR_Replace resolution. Collect a set of end points that can be used to // restore the live range after joining. SmallVector<SlotIndex, 8> EndPoints; - LHSVals.pruneValues(RHSVals, EndPoints); - RHSVals.pruneValues(LHSVals, EndPoints); + LHSVals.pruneValues(RHSVals, EndPoints, true); + RHSVals.pruneValues(LHSVals, EndPoints, true); // Erase COPY and IMPLICIT_DEF instructions. This may cause some external // registers to require trimming. @@ -2041,24 +2554,23 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { MRI->clearKillFlags(LHS.reg); MRI->clearKillFlags(RHS.reg); - if (EndPoints.empty()) - return true; + if (!EndPoints.empty()) { + // Recompute the parts of the live range we had to remove because of + // CR_Replace conflicts. + DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size() + << " points: " << LHS << '\n'); + LIS->extendToIndices((LiveRange&)LHS, EndPoints); + } - // Recompute the parts of the live range we had to remove because of - // CR_Replace conflicts. - DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size() - << " points: " << LHS << '\n'); - LIS->extendToIndices(LHS, EndPoints); return true; } -/// Attempt to join these two intervals. On failure, this returns false. bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) { return CP.isPhys() ? joinReservedPhysReg(CP) : joinVirtRegs(CP); } namespace { -// Information concerning MBB coalescing priority. +/// Information concerning MBB coalescing priority. struct MBBPriorityInfo { MachineBasicBlock *MBB; unsigned Depth; @@ -2069,10 +2581,10 @@ struct MBBPriorityInfo { }; } -// C-style comparator that sorts first based on the loop depth of the basic -// block (the unsigned), and then on the MBB number. -// -// EnableGlobalCopies assumes that the primary sort key is loop depth. +/// C-style comparator that sorts first based on the loop depth of the basic +/// block (the unsigned), and then on the MBB number. +/// +/// EnableGlobalCopies assumes that the primary sort key is loop depth. static int compareMBBPriority(const MBBPriorityInfo *LHS, const MBBPriorityInfo *RHS) { // Deeper loops first @@ -2112,8 +2624,6 @@ static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) { || LIS->intervalIsInOneMBB(LIS->getInterval(DstReg)); } -// Try joining WorkList copies starting from index From. -// Null out any successful joins. bool RegisterCoalescer:: copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) { bool Progress = false; @@ -2224,15 +2734,14 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { MF = &fn; MRI = &fn.getRegInfo(); TM = &fn.getTarget(); - TRI = TM->getSubtargetImpl()->getRegisterInfo(); - TII = TM->getSubtargetImpl()->getInstrInfo(); + const TargetSubtargetInfo &STI = fn.getSubtarget(); + TRI = STI.getRegisterInfo(); + TII = STI.getInstrInfo(); LIS = &getAnalysis<LiveIntervals>(); AA = &getAnalysis<AliasAnalysis>(); Loops = &getAnalysis<MachineLoopInfo>(); - - const TargetSubtargetInfo &ST = TM->getSubtarget<TargetSubtargetInfo>(); if (EnableGlobalCopies == cl::BOU_UNSET) - JoinGlobalCopies = ST.useMachineScheduler(); + JoinGlobalCopies = STI.useMachineScheduler(); else JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE); @@ -2264,9 +2773,24 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { unsigned Reg = InflateRegs[i]; if (MRI->reg_nodbg_empty(Reg)) continue; - if (MRI->recomputeRegClass(Reg, *TM)) { + if (MRI->recomputeRegClass(Reg)) { DEBUG(dbgs() << PrintReg(Reg) << " inflated to " << TRI->getRegClassName(MRI->getRegClass(Reg)) << '\n'); + LiveInterval &LI = LIS->getInterval(Reg); + unsigned MaxMask = MRI->getMaxLaneMaskForVReg(Reg); + if (MaxMask == 0) { + // If the inflated register class does not support subregisters anymore + // remove the subranges. + LI.clearSubRanges(); + } else { +#ifndef NDEBUG + // If subranges are still supported, then the same subregs should still + // be supported. + for (LiveInterval::SubRange &S : LI.subranges()) { + assert ((S.LaneMask & ~MaxMask) == 0); + } +#endif + } ++NumInflated; } } @@ -2277,7 +2801,6 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { return true; } -/// Implement the dump method. void RegisterCoalescer::print(raw_ostream &O, const Module* m) const { LIS->print(O, m); } diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index 6f8b337..76a7fef 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -36,8 +36,8 @@ static cl::opt<bool> StressSchedOpt( void SchedulingPriorityQueue::anchor() { } ScheduleDAG::ScheduleDAG(MachineFunction &mf) - : TM(mf.getTarget()), TII(TM.getSubtargetImpl()->getInstrInfo()), - TRI(TM.getSubtargetImpl()->getRegisterInfo()), MF(mf), + : TM(mf.getTarget()), TII(mf.getSubtarget().getInstrInfo()), + TRI(mf.getSubtarget().getRegisterInfo()), MF(mf), MRI(mf.getRegInfo()), EntrySU(), ExitSU() { #ifndef NDEBUG StressSched = StressSchedOpt; diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index dece643..78bfd23 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -44,25 +44,24 @@ using namespace llvm; static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden, cl::ZeroOrMore, cl::init(false), - cl::desc("Enable use of AA during MI GAD construction")); + cl::desc("Enable use of AA during MI DAG construction")); static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, - cl::init(true), cl::desc("Enable use of TBAA during MI GAD construction")); + cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction")); ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo *mli, - bool IsPostRAFlag, - bool RemoveKillFlags, + bool IsPostRAFlag, bool RemoveKillFlags, LiveIntervals *lis) - : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), LIS(lis), - IsPostRA(IsPostRAFlag), RemoveKillFlags(RemoveKillFlags), - CanHandleTerminators(false), FirstDbgValue(nullptr) { + : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), LIS(lis), + IsPostRA(IsPostRAFlag), RemoveKillFlags(RemoveKillFlags), + CanHandleTerminators(false), FirstDbgValue(nullptr) { assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals"); DbgValues.clear(); assert(!(IsPostRA && MRI.getNumVirtRegs()) && "Virtual registers must be removed prior to PostRA scheduling"); - const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); + const TargetSubtargetInfo &ST = mf.getSubtarget(); SchedModel.init(ST.getSchedModel(), &ST, TII); } @@ -253,7 +252,7 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { assert(MO.isDef() && "expect physreg def"); // Ask the target if address-backscheduling is desirable, and if so how much. - const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); + const TargetSubtargetInfo &ST = MF.getSubtarget(); for (MCRegAliasIterator Alias(MO.getReg(), TRI, true); Alias.isValid(); ++Alias) { @@ -444,7 +443,7 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { int DefOp = Def->findRegisterDefOperandIdx(Reg); dep.setLatency(SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx)); - const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); + const TargetSubtargetInfo &ST = MF.getSubtarget(); ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep)); SU->addPred(dep); } @@ -614,10 +613,10 @@ iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI, } // Track current depth. (*Depth)++; - // Iterate over chain dependencies only. + // Iterate over memory dependencies only. for (SUnit::const_succ_iterator I = SUb->Succs.begin(), E = SUb->Succs.end(); I != E; ++I) - if (I->isCtrl()) + if (I->isNormalMemoryOrBarrier()) iterateChainSucc (AA, MFI, SUa, I->getSUnit(), ExitSU, Depth, Visited); return *Depth; } @@ -644,11 +643,12 @@ static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI, Dep.setLatency(((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0); (*I)->addPred(Dep); } - // Now go through all the chain successors and iterate from them. - // Keep track of visited nodes. + + // Iterate recursively over all previously added memory chain + // successors. Keep track of visited nodes. for (SUnit::const_succ_iterator J = (*I)->Succs.begin(), JE = (*I)->Succs.end(); J != JE; ++J) - if (J->isCtrl()) + if (J->isNormalMemoryOrBarrier()) iterateChainSucc (AA, MFI, SU, J->getSUnit(), ExitSU, &Depth, Visited); } @@ -742,7 +742,7 @@ void ScheduleDAGInstrs::initSUnits() { void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, RegPressureTracker *RPTracker, PressureDiffs *PDiffs) { - const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); + const TargetSubtargetInfo &ST = MF.getSubtarget(); bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI : ST.useAA(); AliasAnalysis *AAForDep = UseAA ? AA : nullptr; @@ -891,7 +891,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, // fall-through new_alias_chain: - // Chain all possibly aliasing memory references though SU. + // Chain all possibly aliasing memory references through SU. if (AliasChain) { unsigned ChainLatency = 0; if (AliasChain->getInstr()->mayLoad()) @@ -991,11 +991,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, // Add dependence on alias chain, if needed. if (AliasChain) addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes); - // But we also should check dependent instructions for the - // SU in question. - adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, - TrueMemOrderLatency); } + adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, + TrueMemOrderLatency); } else if (MI->mayLoad()) { bool MayAlias = true; if (MI->isInvariantLoad(AA)) { diff --git a/lib/CodeGen/SelectionDAG/Android.mk b/lib/CodeGen/SelectionDAG/Android.mk index 0e52ee3..9501ad9 100644 --- a/lib/CodeGen/SelectionDAG/Android.mk +++ b/lib/CodeGen/SelectionDAG/Android.mk @@ -22,6 +22,7 @@ codegen_selectiondag_SRC_FILES := \ SelectionDAGDumper.cpp \ SelectionDAGISel.cpp \ SelectionDAGPrinter.cpp \ + StatepointLowering.cpp \ TargetLowering.cpp \ TargetSelectionDAGInfo.cpp diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt index 75e8167..fbedf2c 100644 --- a/lib/CodeGen/SelectionDAG/CMakeLists.txt +++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt @@ -19,6 +19,7 @@ add_llvm_library(LLVMSelectionDAG SelectionDAGDumper.cpp SelectionDAGISel.cpp SelectionDAGPrinter.cpp + StatepointLowering.cpp ScheduleDAGVLIW.cpp TargetLowering.cpp TargetSelectionDAGInfo.cpp diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index a1291ed..6129401 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -17,9 +17,9 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -303,6 +303,8 @@ namespace { SDValue visitEXTRACT_SUBVECTOR(SDNode *N); SDValue visitVECTOR_SHUFFLE(SDNode *N); SDValue visitINSERT_SUBVECTOR(SDNode *N); + SDValue visitMLOAD(SDNode *N); + SDValue visitMSTORE(SDNode *N); SDValue XformToShuffleWithZero(SDNode *N); SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS); @@ -325,6 +327,7 @@ namespace { SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, unsigned HiOp); SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); + SDValue CombineExtLoad(SDNode *N); SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); SDValue BuildSDIV(SDNode *N); SDValue BuildSDIVPow2(SDNode *N); @@ -361,6 +364,28 @@ namespace { /// chain (aliasing node.) SDValue FindBetterChain(SDNode *N, SDValue Chain); + /// Holds a pointer to an LSBaseSDNode as well as information on where it + /// is located in a sequence of memory operations connected by a chain. + struct MemOpLink { + MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq): + MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { } + // Ptr to the mem node. + LSBaseSDNode *MemNode; + // Offset from the base ptr. + int64_t OffsetFromBase; + // What is the sequence number of this mem node. + // Lowest mem operand in the DAG starts at zero. + unsigned SequenceNum; + }; + + /// This is a helper function for MergeConsecutiveStores. When the source + /// elements of the consecutive stores are all constants or all extracted + /// vector elements, try to merge them into one larger store. + /// \return True if a merged store was created. + bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes, + EVT MemVT, unsigned NumElem, + bool IsConstantSrc, bool UseVector); + /// Merge consecutive store operations into a wide store. /// This optimization uses wide integers or vectors when possible. /// \return True if some memory operations were changed. @@ -378,12 +403,9 @@ namespace { DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) { - AttributeSet FnAttrs = - DAG.getMachineFunction().getFunction()->getAttributes(); - ForCodeSize = - FnAttrs.hasAttribute(AttributeSet::FunctionIndex, - Attribute::OptimizeForSize) || - FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); + auto *F = DAG.getMachineFunction().getFunction(); + ForCodeSize = F->hasFnAttribute(Attribute::OptimizeForSize) || + F->hasFnAttribute(Attribute::MinSize); } /// Runs the dag combiner on all nodes in the work list @@ -444,7 +466,7 @@ void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) { } SDValue TargetLowering::DAGCombinerInfo:: -CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) { +CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) { return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo); } @@ -736,10 +758,9 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, if (SDNode *L = isConstantBuildVectorOrConstantInt(N0.getOperand(1))) { if (SDNode *R = isConstantBuildVectorOrConstantInt(N1)) { // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) - SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, L, R); - if (!OpNode.getNode()) - return SDValue(); - return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); + if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, L, R)) + return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); + return SDValue(); } if (N0.hasOneUse()) { // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one @@ -757,10 +778,9 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, if (SDNode *R = isConstantBuildVectorOrConstantInt(N1.getOperand(1))) { if (SDNode *L = isConstantBuildVectorOrConstantInt(N0)) { // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) - SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, R, L); - if (!OpNode.getNode()) - return SDValue(); - return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); + if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, R, L)) + return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); + return SDValue(); } if (N1.hasOneUse()) { // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one @@ -785,11 +805,12 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, N->dump(&DAG); dbgs() << "\nWith: "; To[0].getNode()->dump(&DAG); - dbgs() << " and " << NumTo-1 << " other values\n"; - for (unsigned i = 0, e = NumTo; i != e; ++i) - assert((!To[i].getNode() || - N->getValueType(i) == To[i].getValueType()) && - "Cannot combine value to value of different type!")); + dbgs() << " and " << NumTo-1 << " other values\n"); + for (unsigned i = 0, e = NumTo; i != e; ++i) + assert((!To[i].getNode() || + N->getValueType(i) == To[i].getValueType()) && + "Cannot combine value to value of different type!"); + WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesWith(N, To); if (AddTo) { @@ -874,8 +895,8 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) { EVT MemVT = LD->getMemoryVT(); ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) - ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD - : ISD::EXTLOAD) + ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD + : ISD::EXTLOAD) : LD->getExtensionType(); Replace = true; return DAG.getExtLoad(ExtType, dl, PVT, @@ -1096,8 +1117,8 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { LoadSDNode *LD = cast<LoadSDNode>(N); EVT MemVT = LD->getMemoryVT(); ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) - ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD - : ISD::EXTLOAD) + ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD + : ISD::EXTLOAD) : LD->getExtensionType(); SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT, LD->getChain(), LD->getBasePtr(), @@ -1160,10 +1181,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) { LegalTypes = Level >= AfterLegalizeTypes; // Early exit if this basic block is in an optnone function. - AttributeSet FnAttrs = - DAG.getMachineFunction().getFunction()->getAttributes(); - if (FnAttrs.hasAttribute(AttributeSet::FunctionIndex, - Attribute::OptimizeNone)) + if (DAG.getMachineFunction().getFunction()->hasFnAttribute( + Attribute::OptimizeNone)) return; // Add all the dag nodes to the worklist. @@ -1351,6 +1370,8 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N); + case ISD::MLOAD: return visitMLOAD(N); + case ISD::MSTORE: return visitMSTORE(N); } return SDValue(); } @@ -1475,7 +1496,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { switch (Op.getOpcode()) { case ISD::EntryToken: // Entry tokens don't need to be added to the list. They are - // rededundant. + // redundant. Changed = true; break; @@ -1504,7 +1525,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { SDValue Result; - // If we've change things around then replace token factor. + // If we've changed things around then replace token factor. if (Changed) { if (Ops.empty()) { // The entry token is the only possible outcome. @@ -1514,8 +1535,11 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops); } - // Don't add users to work list. - return CombineTo(N, Result, false); + // Add users to worklist if AA is enabled, since it may introduce + // a lot of new chained token factors while removing memory deps. + bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA + : DAG.getSubtarget().useAA(); + return CombineTo(N, Result, UseAA /*add to worklist*/); } return Result; @@ -1541,8 +1565,6 @@ SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { SDValue DAGCombiner::visitADD(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N0.getValueType(); // fold vector ops @@ -1563,6 +1585,8 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (N1.getOpcode() == ISD::UNDEF) return N1; // fold (add c1, c2) -> c1+c2 + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C); // canonicalize constant to RHS @@ -1714,8 +1738,6 @@ SDValue DAGCombiner::visitADD(SDNode *N) { SDValue DAGCombiner::visitADDC(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N0.getValueType(); // If the flag result is dead, turn this into an ADD. @@ -1725,6 +1747,8 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { SDLoc(N), MVT::Glue)); // canonicalize constant to RHS. + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); if (N0C && !N1C) return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0); @@ -1756,10 +1780,10 @@ SDValue DAGCombiner::visitADDE(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue CarryIn = N->getOperand(2); - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); // canonicalize constant to RHS + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); if (N0C && !N1C) return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(), N1, N0, CarryIn); @@ -1786,10 +1810,6 @@ static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT, SDValue DAGCombiner::visitSUB(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); - ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr : - dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode()); EVT VT = N0.getValueType(); // fold vector ops @@ -1807,6 +1827,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (N0 == N1) return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); // fold (sub c1, c2) -> c1-c2 + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C); // fold (sub x, c) -> (add x, -c) @@ -1826,6 +1848,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1) return N0.getOperand(0); // fold C2-(A+C1) -> (C2-C1)-A + ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr : + dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode()); if (N1.getOpcode() == ISD::ADD && N0C && N1C1) { SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(), VT); @@ -1890,8 +1914,6 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { SDValue DAGCombiner::visitSUBC(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N0.getValueType(); // If the flag result is dead, turn this into an SUB. @@ -1907,6 +1929,8 @@ SDValue DAGCombiner::visitSUBC(SDNode *N) { MVT::Glue)); // fold (subc x, 0) -> x + no borrow + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); if (N1C && N1C->isNullValue()) return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), MVT::Glue)); @@ -2055,8 +2079,6 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { SDValue DAGCombiner::visitSDIV(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = isConstOrConstSplat(N0); - ConstantSDNode *N1C = isConstOrConstSplat(N1); EVT VT = N->getValueType(0); // fold vector ops @@ -2066,6 +2088,8 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { } // fold (sdiv c1, c2) -> c1/c2 + ConstantSDNode *N0C = isConstOrConstSplat(N0); + ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N0C && N1C && !N1C->isNullValue()) return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C); // fold (sdiv X, 1) -> X @@ -2145,8 +2169,6 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { SDValue DAGCombiner::visitUDIV(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = isConstOrConstSplat(N0); - ConstantSDNode *N1C = isConstOrConstSplat(N1); EVT VT = N->getValueType(0); // fold vector ops @@ -2156,6 +2178,8 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { } // fold (udiv c1, c2) -> c1/c2 + ConstantSDNode *N0C = isConstOrConstSplat(N0); + ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N0C && N1C && !N1C->isNullValue()) return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C); // fold (udiv x, (1 << c)) -> x >>u c @@ -2197,11 +2221,11 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { SDValue DAGCombiner::visitSREM(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = isConstOrConstSplat(N0); - ConstantSDNode *N1C = isConstOrConstSplat(N1); EVT VT = N->getValueType(0); // fold (srem c1, c2) -> c1%c2 + ConstantSDNode *N0C = isConstOrConstSplat(N0); + ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N0C && N1C && !N1C->isNullValue()) return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C); // If we know the sign bits of both operands are zero, strength reduce to a @@ -2239,11 +2263,11 @@ SDValue DAGCombiner::visitSREM(SDNode *N) { SDValue DAGCombiner::visitUREM(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = isConstOrConstSplat(N0); - ConstantSDNode *N1C = isConstOrConstSplat(N1); EVT VT = N->getValueType(0); // fold (urem c1, c2) -> c1%c2 + ConstantSDNode *N0C = isConstOrConstSplat(N0); + ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N0C && N1C && !N1C->isNullValue()) return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C); // fold (urem x, pow2) -> (and x, pow2-1) @@ -2522,6 +2546,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) + // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y)) // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) // // do not sink logical op inside of a vector extend, since it may combine @@ -2529,6 +2554,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { EVT Op0VT = N0.getOperand(0).getValueType(); if ((N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND || + N0.getOpcode() == ISD::BSWAP || // Avoid infinite looping with PromoteIntBinOp. (N0.getOpcode() == ISD::ANY_EXTEND && (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) || @@ -2662,11 +2688,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { SDValue DAGCombiner::visitAND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - SDValue LL, LR, RL, RR, CC0, CC1; - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N1.getValueType(); - unsigned BitWidth = VT.getScalarType().getSizeInBits(); // fold vector ops if (VT.isVector()) { @@ -2698,6 +2720,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) return DAG.getConstant(0, VT); // fold (and c1, c2) -> c1&c2 + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C); // canonicalize constant to RHS @@ -2707,6 +2731,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (N1C && N1C->isAllOnesValue()) return N0; // if (and x, c) is known to be zero, return 0 + unsigned BitWidth = VT.getScalarType().getSizeInBits(); if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnesValue(BitWidth))) return DAG.getConstant(0, VT); @@ -2793,6 +2818,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // actually legal and isn't going to get expanded, else this is a false // optimisation. bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD, + Load->getValueType(0), Load->getMemoryVT()); // Resize the constant to the same size as the original memory access before @@ -2838,6 +2864,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { } } // fold (and (setcc x), (setcc y)) -> (setcc (and x, y)) + SDValue LL, LR, RL, RR, CC0, CC1; if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); @@ -2919,7 +2946,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, BitWidth - MemVT.getScalarType().getSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { + TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(), LN0->getBasePtr(), MemVT, LN0->getMemOperand()); @@ -2939,7 +2966,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, BitWidth - MemVT.getScalarType().getSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { + TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(), LN0->getBasePtr(), MemVT, LN0->getMemOperand()); @@ -2965,10 +2992,11 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){ EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); EVT LoadedVT = LN0->getMemoryVT(); + EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; if (ExtVT == LoadedVT && - (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) { - EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; + (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, + ExtVT))) { SDValue NewLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, @@ -2983,7 +3011,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // Do not generate loads of non-round integer types since these can // be expensive (and would be wrong if the type is not byte sized). if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() && - (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) { + (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, + ExtVT))) { EVT PtrType = LN0->getOperand(1).getValueType(); unsigned Alignment = LN0->getAlignment(); @@ -3003,7 +3032,6 @@ SDValue DAGCombiner::visitAND(SDNode *N) { AddToWorklist(NewPtr.getNode()); - EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; SDValue Load = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, LN0->getChain(), NewPtr, @@ -3313,9 +3341,6 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { SDValue DAGCombiner::visitOR(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - SDValue LL, LR, RL, RR, CC0, CC1; - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N1.getValueType(); // fold vector ops @@ -3407,6 +3432,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); } // fold (or c1, c2) -> c1|c2 + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C); // canonicalize constant to RHS @@ -3440,15 +3467,15 @@ SDValue DAGCombiner::visitOR(SDNode *N) { isa<ConstantSDNode>(N0.getOperand(1))) { ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1)); if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) { - SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1); - if (!COR.getNode()) - return SDValue(); - return DAG.getNode(ISD::AND, SDLoc(N), VT, - DAG.getNode(ISD::OR, SDLoc(N0), VT, - N0.getOperand(0), N1), COR); + if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1)) + return DAG.getNode( + ISD::AND, SDLoc(N), VT, + DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR); + return SDValue(); } } // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) + SDValue LL, LR, RL, RR, CC0, CC1; if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); @@ -3521,6 +3548,17 @@ SDValue DAGCombiner::visitOR(SDNode *N) { } } + // (or (and X, M), (and X, N)) -> (and X, (or M, N)) + if (N0.getOpcode() == ISD::AND && + N1.getOpcode() == ISD::AND && + N0.getOperand(0) == N1.getOperand(0) && + // Don't increase # computations. + (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { + SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, + N0.getOperand(1), N1.getOperand(1)); + return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), X); + } + // See if this is some rotate idiom. if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N))) return SDValue(Rot, 0); @@ -3790,9 +3828,6 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { SDValue DAGCombiner::visitXOR(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - SDValue LHS, RHS, CC; - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N0.getValueType(); // fold vector ops @@ -3816,6 +3851,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { if (N1.getOpcode() == ISD::UNDEF) return N1; // fold (xor c1, c2) -> c1^c2 + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C); // canonicalize constant to RHS @@ -3830,6 +3867,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { return RXOR; // fold !(x cc y) -> (x !cc y) + SDValue LHS, RHS, CC; if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) { bool isInt = LHS.getValueType().isInteger(); ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), @@ -4039,12 +4077,11 @@ SDValue DAGCombiner::visitRotate(SDNode *N) { SDValue DAGCombiner::visitSHL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarSizeInBits(); // fold vector ops + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); if (VT.isVector()) { SDValue FoldedVOp = SimplifyVBinOp(N); if (FoldedVOp.getNode()) return FoldedVOp; @@ -4061,8 +4098,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC && TLI.getBooleanContents(N00.getOperand(0).getValueType()) == TargetLowering::ZeroOrNegativeOneBooleanContent) { - SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV); - if (C.getNode()) + if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV)) return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C); } } else { @@ -4072,6 +4108,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { } // fold (shl c1, c2) -> c1<<c2 + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C); // fold (shl 0, x) -> 0 @@ -4220,12 +4257,11 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { SDValue DAGCombiner::visitSRA(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); // fold vector ops + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); if (VT.isVector()) { SDValue FoldedVOp = SimplifyVBinOp(N); if (FoldedVOp.getNode()) return FoldedVOp; @@ -4234,6 +4270,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { } // fold (sra c1, c2) -> (sra c1, c2) + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C); // fold (sra 0, x) -> 0 @@ -4366,12 +4403,11 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { SDValue DAGCombiner::visitSRL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); // fold vector ops + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); if (VT.isVector()) { SDValue FoldedVOp = SimplifyVBinOp(N); if (FoldedVOp.getNode()) return FoldedVOp; @@ -4380,6 +4416,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { } // fold (srl c1, c2) -> c1 >>u c2 + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C); // fold (srl 0, x) -> 0 @@ -4608,13 +4645,47 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) { return SDValue(); } + +/// \brief Generate Min/Max node +static SDValue combineMinNumMaxNum(SDLoc DL, EVT VT, SDValue LHS, SDValue RHS, + SDValue True, SDValue False, + ISD::CondCode CC, const TargetLowering &TLI, + SelectionDAG &DAG) { + if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True)) + return SDValue(); + + switch (CC) { + case ISD::SETOLT: + case ISD::SETOLE: + case ISD::SETLT: + case ISD::SETLE: + case ISD::SETULT: + case ISD::SETULE: { + unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM; + if (TLI.isOperationLegal(Opcode, VT)) + return DAG.getNode(Opcode, DL, VT, LHS, RHS); + return SDValue(); + } + case ISD::SETOGT: + case ISD::SETOGE: + case ISD::SETGT: + case ISD::SETGE: + case ISD::SETUGT: + case ISD::SETUGE: { + unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM; + if (TLI.isOperationLegal(Opcode, VT)) + return DAG.getNode(Opcode, DL, VT, LHS, RHS); + return SDValue(); + } + default: + return SDValue(); + } +} + SDValue DAGCombiner::visitSELECT(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); - ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); EVT VT = N->getValueType(0); EVT VT0 = N0.getValueType(); @@ -4622,12 +4693,14 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { if (N1 == N2) return N1; // fold (select true, X, Y) -> X + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); if (N0C && !N0C->isNullValue()) return N1; // fold (select false, X, Y) -> Y if (N0C && N0C->isNullValue()) return N2; // fold (select C, 1, X) -> (or C, X) + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1) return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); // fold (select C, 0, 1) -> (xor C, 1) @@ -4639,6 +4712,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // undiscoverable (or not reasonably discoverable). For example, it could be // in another basic block or it could require searching a complicated // expression. + ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); if (VT.isInteger() && (VT0 == MVT::i1 || (VT0.isInteger() && TLI.getBooleanContents(false, false) == @@ -4687,6 +4761,28 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // fold selects based on a setcc into other things, such as min/max/abs if (N0.getOpcode() == ISD::SETCC) { + // select x, y (fcmp lt x, y) -> fminnum x, y + // select x, y (fcmp gt x, y) -> fmaxnum x, y + // + // This is OK if we don't care about what happens if either operand is a + // NaN. + // + + // FIXME: Instead of testing for UnsafeFPMath, this should be checking for + // no signed zeros as well as no nans. + const TargetOptions &Options = DAG.getTarget().Options; + if (Options.UnsafeFPMath && + VT.isFloatingPoint() && N0.hasOneUse() && + DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) { + ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); + + SDValue FMinMax = + combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), + N1, N2, CC, TLI, DAG); + if (FMinMax) + return FMinMax; + } + if ((!LegalOperations && TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) || TLI.isOperationLegal(ISD::SELECT_CC, VT)) @@ -4771,6 +4867,166 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1)); } +SDValue DAGCombiner::visitMSTORE(SDNode *N) { + + if (Level >= AfterLegalizeTypes) + return SDValue(); + + MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N); + SDValue Mask = MST->getMask(); + SDValue Data = MST->getValue(); + SDLoc DL(N); + + // If the MSTORE data type requires splitting and the mask is provided by a + // SETCC, then split both nodes and its operands before legalization. This + // prevents the type legalizer from unrolling SETCC into scalar comparisons + // and enables future optimizations (e.g. min/max pattern matching on X86). + if (Mask.getOpcode() == ISD::SETCC) { + + // Check if any splitting is required. + if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) != + TargetLowering::TypeSplitVector) + return SDValue(); + + SDValue MaskLo, MaskHi, Lo, Hi; + std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); + + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MST->getValueType(0)); + + SDValue Chain = MST->getChain(); + SDValue Ptr = MST->getBasePtr(); + + EVT MemoryVT = MST->getMemoryVT(); + unsigned Alignment = MST->getOriginalAlignment(); + + // if Alignment is equal to the vector size, + // take the half of it for the second part + unsigned SecondHalfAlignment = + (Alignment == Data->getValueType(0).getSizeInBits()/8) ? + Alignment/2 : Alignment; + + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + SDValue DataLo, DataHi; + std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); + + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(MST->getPointerInfo(), + MachineMemOperand::MOStore, LoMemVT.getStoreSize(), + Alignment, MST->getAAInfo(), MST->getRanges()); + + Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO, + MST->isTruncatingStore()); + + unsigned IncrementSize = LoMemVT.getSizeInBits()/8; + Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, Ptr.getValueType())); + + MMO = DAG.getMachineFunction(). + getMachineMemOperand(MST->getPointerInfo(), + MachineMemOperand::MOStore, HiMemVT.getStoreSize(), + SecondHalfAlignment, MST->getAAInfo(), + MST->getRanges()); + + Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO, + MST->isTruncatingStore()); + + AddToWorklist(Lo.getNode()); + AddToWorklist(Hi.getNode()); + + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); + } + return SDValue(); +} + +SDValue DAGCombiner::visitMLOAD(SDNode *N) { + + if (Level >= AfterLegalizeTypes) + return SDValue(); + + MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N); + SDValue Mask = MLD->getMask(); + SDLoc DL(N); + + // If the MLOAD result requires splitting and the mask is provided by a + // SETCC, then split both nodes and its operands before legalization. This + // prevents the type legalizer from unrolling SETCC into scalar comparisons + // and enables future optimizations (e.g. min/max pattern matching on X86). + + if (Mask.getOpcode() == ISD::SETCC) { + EVT VT = N->getValueType(0); + + // Check if any splitting is required. + if (TLI.getTypeAction(*DAG.getContext(), VT) != + TargetLowering::TypeSplitVector) + return SDValue(); + + SDValue MaskLo, MaskHi, Lo, Hi; + std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); + + SDValue Src0 = MLD->getSrc0(); + SDValue Src0Lo, Src0Hi; + std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL); + + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0)); + + SDValue Chain = MLD->getChain(); + SDValue Ptr = MLD->getBasePtr(); + EVT MemoryVT = MLD->getMemoryVT(); + unsigned Alignment = MLD->getOriginalAlignment(); + + // if Alignment is equal to the vector size, + // take the half of it for the second part + unsigned SecondHalfAlignment = + (Alignment == MLD->getValueType(0).getSizeInBits()/8) ? + Alignment/2 : Alignment; + + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(MLD->getPointerInfo(), + MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), + Alignment, MLD->getAAInfo(), MLD->getRanges()); + + Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO, + ISD::NON_EXTLOAD); + + unsigned IncrementSize = LoMemVT.getSizeInBits()/8; + Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, Ptr.getValueType())); + + MMO = DAG.getMachineFunction(). + getMachineMemOperand(MLD->getPointerInfo(), + MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), + SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); + + Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO, + ISD::NON_EXTLOAD); + + AddToWorklist(Lo.getNode()); + AddToWorklist(Hi.getNode()); + + // Build a factor node to remember that this load is independent of the + // other one. + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain); + + SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); + + SDValue RetOps[] = { LoadRes, Chain }; + return DAG.getMergeValues(RetOps, DL); + } + return SDValue(); +} + SDValue DAGCombiner::visitVSELECT(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -4880,13 +5136,16 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { return N2; // cond always true -> true val else return N3; // cond always false -> false val - } - - // Fold to a simpler select_cc - if (SCC.getOpcode() == ISD::SETCC) + } else if (SCC->getOpcode() == ISD::UNDEF) { + // When the condition is UNDEF, just return the first operand. This is + // coherent the DAG creation, no setcc node is created in this case + return N2; + } else if (SCC.getOpcode() == ISD::SETCC) { + // Fold to a simpler select_cc return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0), SCC.getOperand(1), N2, N3, SCC.getOperand(2)); + } } // If we can fold this based on the true/false value, do so. @@ -5047,6 +5306,102 @@ void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, } } +// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?). +SDValue DAGCombiner::CombineExtLoad(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT DstVT = N->getValueType(0); + EVT SrcVT = N0.getValueType(); + + assert((N->getOpcode() == ISD::SIGN_EXTEND || + N->getOpcode() == ISD::ZERO_EXTEND) && + "Unexpected node type (not an extend)!"); + + // fold (sext (load x)) to multiple smaller sextloads; same for zext. + // For example, on a target with legal v4i32, but illegal v8i32, turn: + // (v8i32 (sext (v8i16 (load x)))) + // into: + // (v8i32 (concat_vectors (v4i32 (sextload x)), + // (v4i32 (sextload (x + 16))))) + // Where uses of the original load, i.e.: + // (v8i16 (load x)) + // are replaced with: + // (v8i16 (truncate + // (v8i32 (concat_vectors (v4i32 (sextload x)), + // (v4i32 (sextload (x + 16))))))) + // + // This combine is only applicable to illegal, but splittable, vectors. + // All legal types, and illegal non-vector types, are handled elsewhere. + // This combine is controlled by TargetLowering::isVectorLoadExtDesirable. + // + if (N0->getOpcode() != ISD::LOAD) + return SDValue(); + + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + + if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) || + !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() || + !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0))) + return SDValue(); + + SmallVector<SDNode *, 4> SetCCs; + if (!ExtendUsesToFormExtLoad(N, N0, N->getOpcode(), SetCCs, TLI)) + return SDValue(); + + ISD::LoadExtType ExtType = + N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD; + + // Try to split the vector types to get down to legal types. + EVT SplitSrcVT = SrcVT; + EVT SplitDstVT = DstVT; + while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) && + SplitSrcVT.getVectorNumElements() > 1) { + SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first; + SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first; + } + + if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT)) + return SDValue(); + + SDLoc DL(N); + const unsigned NumSplits = + DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements(); + const unsigned Stride = SplitSrcVT.getStoreSize(); + SmallVector<SDValue, 4> Loads; + SmallVector<SDValue, 4> Chains; + + SDValue BasePtr = LN0->getBasePtr(); + for (unsigned Idx = 0; Idx < NumSplits; Idx++) { + const unsigned Offset = Idx * Stride; + const unsigned Align = MinAlign(LN0->getAlignment(), Offset); + + SDValue SplitLoad = DAG.getExtLoad( + ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr, + LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, + LN0->isVolatile(), LN0->isNonTemporal(), LN0->isInvariant(), + Align, LN0->getAAInfo()); + + BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, + DAG.getConstant(Stride, BasePtr.getValueType())); + + Loads.push_back(SplitLoad.getValue(0)); + Chains.push_back(SplitLoad.getValue(1)); + } + + SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); + SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads); + + CombineTo(N, NewValue); + + // Replace uses of the original load (before extension) + // with a truncate of the concatenated sextloaded vectors. + SDValue Trunc = + DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue); + CombineTo(N0.getNode(), Trunc, NewChain); + ExtendSetCCUses(SetCCs, Trunc, NewValue, DL, + (ISD::NodeType)N->getOpcode()); + return SDValue(N, 0); // Return N so it doesn't get rechecked! +} + SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -5113,17 +5468,18 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { } // fold (sext (load x)) -> (sext (truncate (sextload x))) - // None of the supported targets knows how to perform load and sign extend - // on vectors in one instruction. We only perform this transformation on - // scalars. - if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && - ISD::isUNINDEXEDLoad(N0.getNode()) && - ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || - TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) { + // Only generate vector extloads when 1) they're legal, and 2) they are + // deemed desirable by the target. + if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && + ((!LegalOperations && !VT.isVector() && + !cast<LoadSDNode>(N0)->isVolatile()) || + TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) { bool DoXform = true; SmallVector<SDNode*, 4> SetCCs; if (!N0.hasOneUse()) DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI); + if (VT.isVector()) + DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0)); if (DoXform) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, @@ -5140,6 +5496,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { } } + // fold (sext (load x)) to multiple smaller sextloads. + // Only on illegal but splittable vectors. + if (SDValue ExtLoad = CombineExtLoad(N)) + return ExtLoad; + // fold (sext (sextload x)) -> (sext (truncate (sextload x))) // fold (sext ( extload x)) -> (sext (truncate (sextload x))) if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && @@ -5147,7 +5508,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); EVT MemVT = LN0->getMemoryVT(); if ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) { + TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) { SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), LN0->getBasePtr(), MemVT, @@ -5167,7 +5528,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { N0.getOpcode() == ISD::XOR) && isa<LoadSDNode>(N0.getOperand(0)) && N0.getOperand(1).getOpcode() == ISD::Constant && - TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()) && + TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()) && (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) { @@ -5403,17 +5764,18 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { } // fold (zext (load x)) -> (zext (truncate (zextload x))) - // None of the supported targets knows how to perform load and vector_zext - // on vectors in one instruction. We only perform this transformation on - // scalars. - if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && - ISD::isUNINDEXEDLoad(N0.getNode()) && - ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || - TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) { + // Only generate vector extloads when 1) they're legal, and 2) they are + // deemed desirable by the target. + if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && + ((!LegalOperations && !VT.isVector() && + !cast<LoadSDNode>(N0)->isVolatile()) || + TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) { bool DoXform = true; SmallVector<SDNode*, 4> SetCCs; if (!N0.hasOneUse()) DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI); + if (VT.isVector()) + DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0)); if (DoXform) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, @@ -5431,13 +5793,18 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { } } + // fold (zext (load x)) to multiple smaller zextloads. + // Only on illegal but splittable vectors. + if (SDValue ExtLoad = CombineExtLoad(N)) + return ExtLoad; + // fold (zext (and/or/xor (load x), cst)) -> // (and/or/xor (zextload x), (zext cst)) if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::XOR) && isa<LoadSDNode>(N0.getOperand(0)) && N0.getOperand(1).getOpcode() == ISD::Constant && - TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()) && + TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()) && (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) { @@ -5474,7 +5841,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); EVT MemVT = LN0->getMemoryVT(); if ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) { + TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, LN0->getChain(), LN0->getBasePtr(), MemVT, @@ -5636,7 +6003,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // scalars. if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && ISD::isUNINDEXEDLoad(N0.getNode()) && - TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType())) { + TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) { bool DoXform = true; SmallVector<SDNode*, 4> SetCCs; if (!N0.hasOneUse()) @@ -5666,7 +6033,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); ISD::LoadExtType ExtType = LN0->getExtensionType(); EVT MemVT = LN0->getMemoryVT(); - if (!LegalOperations || TLI.isLoadExtLegal(ExtType, MemVT)) { + if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) { SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N), VT, LN0->getChain(), LN0->getBasePtr(), MemVT, LN0->getMemOperand()); @@ -5795,7 +6162,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { ExtVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() - N01->getZExtValue()); } - if (LegalOperations && !TLI.isLoadExtLegal(ExtType, ExtVT)) + if (LegalOperations && !TLI.isLoadExtLegal(ExtType, VT, ExtVT)) return SDValue(); unsigned EVTBits = ExtVT.getSizeInBits(); @@ -5874,6 +6241,9 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt) return SDValue(); + if (!TLI.shouldReduceLoadWidth(LN0, ExtType, ExtVT)) + return SDValue(); + EVT PtrType = N0.getOperand(1).getValueType(); if (PtrType == MVT::Untyped || PtrType.isExtended()) @@ -5999,7 +6369,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { ISD::isUNINDEXEDLoad(N0.getNode()) && EVT == cast<LoadSDNode>(N0)->getMemoryVT() && ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || - TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { + TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), @@ -6015,7 +6385,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { N0.hasOneUse() && EVT == cast<LoadSDNode>(N0)->getMemoryVT() && ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || - TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { + TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), @@ -6318,19 +6688,15 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // If the input is a constant, let getNode fold it. if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) { - SDValue Res = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, N0); - if (Res.getNode() != N) { - if (!LegalOperations || - TLI.isOperationLegal(Res.getNode()->getOpcode(), VT)) - return Res; - - // Folding it resulted in an illegal node, and it's too late to - // do that. Clean up the old node and forego the transformation. - // Ideally this won't happen very often, because instcombine - // and the earlier dagcombine runs (where illegal nodes are - // permitted) should have folded most of them already. - deleteAndRecombine(Res.getNode()); - } + // If we can't allow illegal operations, we need to check that this is just + // a fp -> int or int -> conversion and that the resulting operation will + // be legal. + if (!LegalOperations || + (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() && + TLI.isOperationLegal(ISD::ConstantFP, VT)) || + (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() && + TLI.isOperationLegal(ISD::Constant, VT))) + return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, N0); } // (conv (conv x, t1), t2) -> (conv x, t2) @@ -6489,7 +6855,6 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { if (SrcEltVT.isFloatingPoint()) { // Convert the input float vector to a int vector where the elements are the // same sizes. - assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!"); EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits()); BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode(); SrcEltVT = IntVT; @@ -6498,7 +6863,6 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { // Now we know the input is an integer vector. If the output is a FP type, // convert to integer first, then to FP of the right size. if (DstEltVT.isFloatingPoint()) { - assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!"); EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits()); SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode(); @@ -6549,8 +6913,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { if (BV->getOperand(i).getOpcode() == ISD::UNDEF) { - for (unsigned j = 0; j != NumOutputsPerInput; ++j) - Ops.push_back(DAG.getUNDEF(DstEltVT)); + Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT)); continue; } @@ -6575,6 +6938,133 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops); } +// Attempt different variants of (fadd (fmul a, b), c) -> fma or fmad +static SDValue performFaddFmulCombines(unsigned FusedOpcode, + bool Aggressive, + SDNode *N, + const TargetLowering &TLI, + SelectionDAG &DAG) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + + // fold (fadd (fmul x, y), z) -> (fma x, y, z) + if (N0.getOpcode() == ISD::FMUL && + (Aggressive || N0->hasOneUse())) { + return DAG.getNode(FusedOpcode, SDLoc(N), VT, + N0.getOperand(0), N0.getOperand(1), N1); + } + + // fold (fadd x, (fmul y, z)) -> (fma y, z, x) + // Note: Commutes FADD operands. + if (N1.getOpcode() == ISD::FMUL && + (Aggressive || N1->hasOneUse())) { + return DAG.getNode(FusedOpcode, SDLoc(N), VT, + N1.getOperand(0), N1.getOperand(1), N0); + } + + // More folding opportunities when target permits. + if (Aggressive) { + // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z)) + if (N0.getOpcode() == ISD::FMA && + N0.getOperand(2).getOpcode() == ISD::FMUL) { + return DAG.getNode(FusedOpcode, SDLoc(N), VT, + N0.getOperand(0), N0.getOperand(1), + DAG.getNode(FusedOpcode, SDLoc(N), VT, + N0.getOperand(2).getOperand(0), + N0.getOperand(2).getOperand(1), + N1)); + } + + // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x)) + if (N1->getOpcode() == ISD::FMA && + N1.getOperand(2).getOpcode() == ISD::FMUL) { + return DAG.getNode(FusedOpcode, SDLoc(N), VT, + N1.getOperand(0), N1.getOperand(1), + DAG.getNode(FusedOpcode, SDLoc(N), VT, + N1.getOperand(2).getOperand(0), + N1.getOperand(2).getOperand(1), + N0)); + } + } + + return SDValue(); +} + +static SDValue performFsubFmulCombines(unsigned FusedOpcode, + bool Aggressive, + SDNode *N, + const TargetLowering &TLI, + SelectionDAG &DAG) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + + SDLoc SL(N); + + // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) + if (N0.getOpcode() == ISD::FMUL && + (Aggressive || N0->hasOneUse())) { + return DAG.getNode(FusedOpcode, SL, VT, + N0.getOperand(0), N0.getOperand(1), + DAG.getNode(ISD::FNEG, SL, VT, N1)); + } + + // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) + // Note: Commutes FSUB operands. + if (N1.getOpcode() == ISD::FMUL && + (Aggressive || N1->hasOneUse())) + return DAG.getNode(FusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + N1.getOperand(0)), + N1.getOperand(1), N0); + + // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) + if (N0.getOpcode() == ISD::FNEG && + N0.getOperand(0).getOpcode() == ISD::FMUL && + (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) { + SDValue N00 = N0.getOperand(0).getOperand(0); + SDValue N01 = N0.getOperand(0).getOperand(1); + return DAG.getNode(FusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, N00), N01, + DAG.getNode(ISD::FNEG, SL, VT, N1)); + } + + // More folding opportunities when target permits. + if (Aggressive) { + // fold (fsub (fma x, y, (fmul u, v)), z) + // -> (fma x, y (fma u, v, (fneg z))) + if (N0.getOpcode() == FusedOpcode && + N0.getOperand(2).getOpcode() == ISD::FMUL) { + return DAG.getNode(FusedOpcode, SDLoc(N), VT, + N0.getOperand(0), N0.getOperand(1), + DAG.getNode(FusedOpcode, SDLoc(N), VT, + N0.getOperand(2).getOperand(0), + N0.getOperand(2).getOperand(1), + DAG.getNode(ISD::FNEG, SDLoc(N), VT, + N1))); + } + + // fold (fsub x, (fma y, z, (fmul u, v))) + // -> (fma (fneg y), z, (fma (fneg u), v, x)) + if (N1.getOpcode() == FusedOpcode && + N1.getOperand(2).getOpcode() == ISD::FMUL) { + SDValue N20 = N1.getOperand(2).getOperand(0); + SDValue N21 = N1.getOperand(2).getOperand(1); + return DAG.getNode(FusedOpcode, SDLoc(N), VT, + DAG.getNode(ISD::FNEG, SDLoc(N), VT, + N1.getOperand(0)), + N1.getOperand(1), + DAG.getNode(FusedOpcode, SDLoc(N), VT, + DAG.getNode(ISD::FNEG, SDLoc(N), VT, + N20), + N21, N0)); + } + } + + return SDValue(); +} + SDValue DAGCombiner::visitFADD(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -6714,23 +7204,55 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { } } // enable-unsafe-fp-math + if (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)) { + // Assume if there is an fmad instruction that it should be aggressively + // used. + if (SDValue Fused = performFaddFmulCombines(ISD::FMAD, true, N, TLI, DAG)) + return Fused; + } + // FADD -> FMA combines: if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) && TLI.isFMAFasterThanFMulAndFAdd(VT) && (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) { - // fold (fadd (fmul x, y), z) -> (fma x, y, z) - if (N0.getOpcode() == ISD::FMUL && - (N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) - return DAG.getNode(ISD::FMA, SDLoc(N), VT, - N0.getOperand(0), N0.getOperand(1), N1); + if (!TLI.isOperationLegal(ISD::FMAD, VT)) { + // Don't form FMA if we are preferring FMAD. + if (SDValue Fused + = performFaddFmulCombines(ISD::FMA, + TLI.enableAggressiveFMAFusion(VT), + N, TLI, DAG)) { + return Fused; + } + } - // fold (fadd x, (fmul y, z)) -> (fma y, z, x) - // Note: Commutes FADD operands. - if (N1.getOpcode() == ISD::FMUL && - (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) - return DAG.getNode(ISD::FMA, SDLoc(N), VT, - N1.getOperand(0), N1.getOperand(1), N0); + // When FP_EXTEND nodes are free on the target, and there is an opportunity + // to combine into FMA, arrange such nodes accordingly. + if (TLI.isFPExtFree(VT)) { + + // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FMUL) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, + N00.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, + N00.getOperand(1)), N1); + } + + // fold (fadd x, (fpext (fmul y, z)), z) -> (fma (fpext y), (fpext z), x) + // Note: Commutes FADD operands. + if (N1.getOpcode() == ISD::FP_EXTEND) { + SDValue N10 = N1.getOperand(0); + if (N10.getOpcode() == ISD::FMUL) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, + N10.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, + N10.getOperand(1)), N0); + } + } } return SDValue(); @@ -6792,37 +7314,95 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { } } + if (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)) { + // Assume if there is an fmad instruction that it should be aggressively + // used. + if (SDValue Fused = performFsubFmulCombines(ISD::FMAD, true, N, TLI, DAG)) + return Fused; + } + // FSUB -> FMA combines: if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) && TLI.isFMAFasterThanFMulAndFAdd(VT) && (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) { - // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) - if (N0.getOpcode() == ISD::FMUL && - (N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) - return DAG.getNode(ISD::FMA, dl, VT, - N0.getOperand(0), N0.getOperand(1), - DAG.getNode(ISD::FNEG, dl, VT, N1)); - - // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) - // Note: Commutes FSUB operands. - if (N1.getOpcode() == ISD::FMUL && - (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) - return DAG.getNode(ISD::FMA, dl, VT, - DAG.getNode(ISD::FNEG, dl, VT, - N1.getOperand(0)), - N1.getOperand(1), N0); - - // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) - if (N0.getOpcode() == ISD::FNEG && - N0.getOperand(0).getOpcode() == ISD::FMUL && - ((N0->hasOneUse() && N0.getOperand(0).hasOneUse()) || - TLI.enableAggressiveFMAFusion(VT))) { - SDValue N00 = N0.getOperand(0).getOperand(0); - SDValue N01 = N0.getOperand(0).getOperand(1); - return DAG.getNode(ISD::FMA, dl, VT, - DAG.getNode(ISD::FNEG, dl, VT, N00), N01, - DAG.getNode(ISD::FNEG, dl, VT, N1)); + if (!TLI.isOperationLegal(ISD::FMAD, VT)) { + // Don't form FMA if we are preferring FMAD. + + if (SDValue Fused + = performFsubFmulCombines(ISD::FMA, + TLI.enableAggressiveFMAFusion(VT), + N, TLI, DAG)) { + return Fused; + } + } + + // When FP_EXTEND nodes are free on the target, and there is an opportunity + // to combine into FMA, arrange such nodes accordingly. + if (TLI.isFPExtFree(VT)) { + // fold (fsub (fpext (fmul x, y)), z) + // -> (fma (fpext x), (fpext y), (fneg z)) + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FMUL) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, + N00.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, + N00.getOperand(1)), + DAG.getNode(ISD::FNEG, SDLoc(N), VT, N1)); + } + + // fold (fsub x, (fpext (fmul y, z))) + // -> (fma (fneg (fpext y)), (fpext z), x) + // Note: Commutes FSUB operands. + if (N1.getOpcode() == ISD::FP_EXTEND) { + SDValue N10 = N1.getOperand(0); + if (N10.getOpcode() == ISD::FMUL) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + DAG.getNode(ISD::FNEG, SDLoc(N), VT, + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), + VT, N10.getOperand(0))), + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, + N10.getOperand(1)), + N0); + } + + // fold (fsub (fpext (fneg (fmul, x, y))), z) + // -> (fma (fneg (fpext x)), (fpext y), (fneg z)) + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FNEG) { + SDValue N000 = N00.getOperand(0); + if (N000.getOpcode() == ISD::FMUL) { + return DAG.getNode(ISD::FMA, dl, VT, + DAG.getNode(ISD::FNEG, dl, VT, + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), + VT, N000.getOperand(0))), + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, + N000.getOperand(1)), + DAG.getNode(ISD::FNEG, dl, VT, N1)); + } + } + } + + // fold (fsub (fneg (fpext (fmul, x, y))), z) + // -> (fma (fneg (fpext x)), (fpext y), (fneg z)) + if (N0.getOpcode() == ISD::FNEG) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FP_EXTEND) { + SDValue N000 = N00.getOperand(0); + if (N000.getOpcode() == ISD::FMUL) { + return DAG.getNode(ISD::FMA, dl, VT, + DAG.getNode(ISD::FNEG, dl, VT, + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), + VT, N000.getOperand(0))), + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, + N000.getOperand(1)), + DAG.getNode(ISD::FNEG, dl, VT, N1)); + } + } + } } } @@ -7104,6 +7684,44 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { } } + // Combine multiple FDIVs with the same divisor into multiple FMULs by the + // reciprocal. + // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip) + // Notice that this is not always beneficial. One reason is different target + // may have different costs for FDIV and FMUL, so sometimes the cost of two + // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason + // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL". + if (Options.UnsafeFPMath) { + // Skip if current node is a reciprocal. + if (N0CFP && N0CFP->isExactlyValue(1.0)) + return SDValue(); + + SmallVector<SDNode *, 4> Users; + // Find all FDIV users of the same divisor. + for (SDNode::use_iterator UI = N1.getNode()->use_begin(), + UE = N1.getNode()->use_end(); + UI != UE; ++UI) { + SDNode *User = UI.getUse().getUser(); + if (User->getOpcode() == ISD::FDIV && User->getOperand(1) == N1) + Users.push_back(User); + } + + if (TLI.combineRepeatedFPDivisors(Users.size())) { + SDValue FPOne = DAG.getConstantFP(1.0, VT); // floating point 1.0 + SDValue Reciprocal = DAG.getNode(ISD::FDIV, SDLoc(N), VT, FPOne, N1); + + // Dividend / Divisor -> Dividend * Reciprocal + for (auto I = Users.begin(), E = Users.end(); I != E; ++I) { + if ((*I)->getOperand(0) != FPOne) { + SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(*I), VT, + (*I)->getOperand(0), Reciprocal); + DAG.ReplaceAllUsesWith(*I, NewNode.getNode()); + } + } + return SDValue(); + } + } + return SDValue(); } @@ -7122,7 +7740,8 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { } SDValue DAGCombiner::visitFSQRT(SDNode *N) { - if (DAG.getTarget().Options.UnsafeFPMath) { + if (DAG.getTarget().Options.UnsafeFPMath && + !TLI.isFsqrtCheap()) { // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5) if (SDValue RV = BuildRsqrtEstimate(N->getOperand(0))) { EVT VT = RV.getValueType(); @@ -7198,11 +7817,11 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { SDValue N0 = N->getOperand(0); - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); EVT VT = N->getValueType(0); EVT OpVT = N0.getValueType(); // fold (sint_to_fp c1) -> c1fp + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); if (N0C && // ...but only if the target supports immediate floating-point values (!LegalOperations || @@ -7251,11 +7870,11 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { SDValue N0 = N->getOperand(0); - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); EVT VT = N->getValueType(0); EVT OpVT = N0.getValueType(); // fold (uint_to_fp c1) -> c1fp + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); if (N0C && // ...but only if the target supports immediate floating-point values (!LegalOperations || @@ -7289,6 +7908,50 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { return SDValue(); } +// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x +static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP) + return SDValue(); + + SDValue Src = N0.getOperand(0); + EVT SrcVT = Src.getValueType(); + bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP; + bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT; + + // We can safely assume the conversion won't overflow the output range, + // because (for example) (uint8_t)18293.f is undefined behavior. + + // Since we can assume the conversion won't overflow, our decision as to + // whether the input will fit in the float should depend on the minimum + // of the input range and output range. + + // This means this is also safe for a signed input and unsigned output, since + // a negative input would lead to undefined behavior. + unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned; + unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned; + unsigned ActualSize = std::min(InputSize, OutputSize); + const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType()); + + // We can only fold away the float conversion if the input range can be + // represented exactly in the float range. + if (APFloat::semanticsPrecision(sem) >= ActualSize) { + if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) { + unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND + : ISD::ZERO_EXTEND; + return DAG.getNode(ExtOp, SDLoc(N), VT, Src); + } + if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits()) + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src); + if (SrcVT == VT) + return Src; + return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Src); + } + return SDValue(); +} + SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) { SDValue N0 = N->getOperand(0); ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); @@ -7298,7 +7961,7 @@ SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) { if (N0CFP) return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0); - return SDValue(); + return FoldIntToFPToInt(N, DAG); } SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { @@ -7310,7 +7973,7 @@ SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { if (N0CFP) return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0); - return SDValue(); + return FoldIntToFPToInt(N, DAG); } SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { @@ -7329,11 +7992,16 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { // fold (fp_round (fp_round x)) -> (fp_round x) if (N0.getOpcode() == ISD::FP_ROUND) { - // This is a value preserving truncation if both round's are. - bool IsTrunc = N->getConstantOperandVal(1) == 1 && - N0.getNode()->getConstantOperandVal(1) == 1; - return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0.getOperand(0), - DAG.getIntPtrConstant(IsTrunc)); + const bool NIsTrunc = N->getConstantOperandVal(1) == 1; + const bool N0IsTrunc = N0.getNode()->getConstantOperandVal(1) == 1; + // If the first fp_round isn't a value preserving truncation, it might + // introduce a tie in the second fp_round, that wouldn't occur in the + // single-step fp_round we want to fold to. + // In other words, double rounding isn't the same as rounding. + // Also, this is a value preserving truncation iff both fp_round's are. + if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) + return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0.getOperand(0), + DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc)); } // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y) @@ -7391,7 +8059,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { // fold (fpext (load x)) -> (fpext (fptrunc (extload x))) if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && - TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType())) { + TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(), @@ -8923,7 +9591,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { if (NotMaskLZ == 64) return Result; // All zero mask. // See if we have a continuous run of bits. If so, we have 0*1+0* - if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64) + if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64) return Result; // Adjust NotMaskLZ down to be from the actual size of the int instead of i64. @@ -9070,9 +9738,12 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1; unsigned NewBW = NextPowerOf2(MSB - ShAmt); EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); + // The narrowing should be profitable, the load/store operation should be + // legal (or custom) and the store size should be equal to the NewVT width. while (NewBW < BitWidth && - !(TLI.isOperationLegalOrCustom(Opc, NewVT) && - TLI.isNarrowingProfitable(VT, NewVT))) { + (NewVT.getStoreSizeInBits() != NewBW || + !TLI.isOperationLegalOrCustom(Opc, NewVT) || + !TLI.isNarrowingProfitable(VT, NewVT))) { NewBW = NextPowerOf2(NewBW); NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); } @@ -9272,36 +9943,139 @@ struct BaseIndexOffset { } }; -/// Holds a pointer to an LSBaseSDNode as well as information on where it -/// is located in a sequence of memory operations connected by a chain. -struct MemOpLink { - MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq): - MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { } - // Ptr to the mem node. - LSBaseSDNode *MemNode; - // Offset from the base ptr. - int64_t OffsetFromBase; - // What is the sequence number of this mem node. - // Lowest mem operand in the DAG starts at zero. - unsigned SequenceNum; -}; +bool DAGCombiner::MergeStoresOfConstantsOrVecElts( + SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, + unsigned NumElem, bool IsConstantSrc, bool UseVector) { + // Make sure we have something to merge. + if (NumElem < 2) + return false; + + int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8; + LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; + unsigned EarliestNodeUsed = 0; + + for (unsigned i=0; i < NumElem; ++i) { + // Find a chain for the new wide-store operand. Notice that some + // of the store nodes that we found may not be selected for inclusion + // in the wide store. The chain we use needs to be the chain of the + // earliest store node which is *used* and replaced by the wide store. + if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum) + EarliestNodeUsed = i; + } + + // The earliest Node in the DAG. + LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode; + SDLoc DL(StoreNodes[0].MemNode); + + SDValue StoredVal; + if (UseVector) { + // Find a legal type for the vector store. + EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem); + assert(TLI.isTypeLegal(Ty) && "Illegal vector store"); + if (IsConstantSrc) { + // A vector store with a constant source implies that the constant is + // zero; we only handle merging stores of constant zeros because the zero + // can be materialized without a load. + // It may be beneficial to loosen this restriction to allow non-zero + // store merging. + StoredVal = DAG.getConstant(0, Ty); + } else { + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0; i < NumElem ; ++i) { + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); + SDValue Val = St->getValue(); + // All of the operands of a BUILD_VECTOR must have the same type. + if (Val.getValueType() != MemVT) + return false; + Ops.push_back(Val); + } + + // Build the extracted vector elements back into a vector. + StoredVal = DAG.getNode(ISD::BUILD_VECTOR, DL, Ty, Ops); + } + } else { + // We should always use a vector store when merging extracted vector + // elements, so this path implies a store of constants. + assert(IsConstantSrc && "Merged vector elements should use vector store"); + + unsigned StoreBW = NumElem * ElementSizeBytes * 8; + APInt StoreInt(StoreBW, 0); + + // Construct a single integer constant which is made of the smaller + // constant inputs. + bool IsLE = TLI.isLittleEndian(); + for (unsigned i = 0; i < NumElem ; ++i) { + unsigned Idx = IsLE ? (NumElem - 1 - i) : i; + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode); + SDValue Val = St->getValue(); + StoreInt <<= ElementSizeBytes*8; + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) { + StoreInt |= C->getAPIntValue().zext(StoreBW); + } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) { + StoreInt |= C->getValueAPF().bitcastToAPInt().zext(StoreBW); + } else { + llvm_unreachable("Invalid constant element type"); + } + } + + // Create the new Load and Store operations. + EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); + StoredVal = DAG.getConstant(StoreInt, StoreTy); + } + + SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal, + FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), + false, false, + FirstInChain->getAlignment()); + + // Replace the first store with the new store + CombineTo(EarliestOp, NewStore); + // Erase all other stores. + for (unsigned i = 0; i < NumElem ; ++i) { + if (StoreNodes[i].MemNode == EarliestOp) + continue; + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); + // ReplaceAllUsesWith will replace all uses that existed when it was + // called, but graph optimizations may cause new ones to appear. For + // example, the case in pr14333 looks like + // + // St's chain -> St -> another store -> X + // + // And the only difference from St to the other store is the chain. + // When we change it's chain to be St's chain they become identical, + // get CSEed and the net result is that X is now a use of St. + // Since we know that St is redundant, just iterate. + while (!St->use_empty()) + DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain()); + deleteAndRecombine(St); + } + + return true; +} bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { + if (OptLevel == CodeGenOpt::None) + return false; + EVT MemVT = St->getMemoryVT(); int64_t ElementSizeBytes = MemVT.getSizeInBits()/8; - bool NoVectors = DAG.getMachineFunction().getFunction()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat); + bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute( + Attribute::NoImplicitFloat); // Don't merge vectors into wider inputs. if (MemVT.isVector() || !MemVT.isSimple()) return false; // Perform an early exit check. Do not bother looking at stored values that - // are not constants or loads. + // are not constants, loads, or extracted vector elements. SDValue StoredVal = St->getValue(); bool IsLoadSrc = isa<LoadSDNode>(StoredVal); - if (!isa<ConstantSDNode>(StoredVal) && !isa<ConstantFPSDNode>(StoredVal) && - !IsLoadSrc) + bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) || + isa<ConstantFPSDNode>(StoredVal); + bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT); + + if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc) return false; // Only look at ends of store sequences. @@ -9443,7 +10217,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; // Store the constants into memory as one consecutive store. - if (!IsLoadSrc) { + if (IsConstantSrc) { unsigned LastLegalType = 0; unsigned LastLegalVectorType = 0; bool NonZero = false; @@ -9492,85 +10266,33 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors; unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType; - // Make sure we have something to merge. - if (NumElem < 2) - return false; - - unsigned EarliestNodeUsed = 0; - for (unsigned i=0; i < NumElem; ++i) { - // Find a chain for the new wide-store operand. Notice that some - // of the store nodes that we found may not be selected for inclusion - // in the wide store. The chain we use needs to be the chain of the - // earliest store node which is *used* and replaced by the wide store. - if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum) - EarliestNodeUsed = i; - } + return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, + true, UseVector); + } - // The earliest Node in the DAG. - LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode; - SDLoc DL(StoreNodes[0].MemNode); + // When extracting multiple vector elements, try to store them + // in one vector store rather than a sequence of scalar stores. + if (IsExtractVecEltSrc) { + unsigned NumElem = 0; + for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) { + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); + SDValue StoredVal = St->getValue(); + // This restriction could be loosened. + // Bail out if any stored values are not elements extracted from a vector. + // It should be possible to handle mixed sources, but load sources need + // more careful handling (see the block of code below that handles + // consecutive loads). + if (StoredVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return false; - SDValue StoredVal; - if (UseVector) { // Find a legal type for the vector store. - EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem); - assert(TLI.isTypeLegal(Ty) && "Illegal vector store"); - StoredVal = DAG.getConstant(0, Ty); - } else { - unsigned StoreBW = NumElem * ElementSizeBytes * 8; - APInt StoreInt(StoreBW, 0); - - // Construct a single integer constant which is made of the smaller - // constant inputs. - bool IsLE = TLI.isLittleEndian(); - for (unsigned i = 0; i < NumElem ; ++i) { - unsigned Idx = IsLE ?(NumElem - 1 - i) : i; - StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode); - SDValue Val = St->getValue(); - StoreInt<<=ElementSizeBytes*8; - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) { - StoreInt|=C->getAPIntValue().zext(StoreBW); - } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) { - StoreInt|= C->getValueAPF().bitcastToAPInt().zext(StoreBW); - } else { - assert(false && "Invalid constant element type"); - } - } - - // Create the new Load and Store operations. - EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); - StoredVal = DAG.getConstant(StoreInt, StoreTy); - } - - SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal, - FirstInChain->getBasePtr(), - FirstInChain->getPointerInfo(), - false, false, - FirstInChain->getAlignment()); - - // Replace the first store with the new store - CombineTo(EarliestOp, NewStore); - // Erase all other stores. - for (unsigned i = 0; i < NumElem ; ++i) { - if (StoreNodes[i].MemNode == EarliestOp) - continue; - StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); - // ReplaceAllUsesWith will replace all uses that existed when it was - // called, but graph optimizations may cause new ones to appear. For - // example, the case in pr14333 looks like - // - // St's chain -> St -> another store -> X - // - // And the only difference from St to the other store is the chain. - // When we change it's chain to be St's chain they become identical, - // get CSEed and the net result is that X is now a use of St. - // Since we know that St is redundant, just iterate. - while (!St->use_empty()) - DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain()); - deleteAndRecombine(St); + EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); + if (TLI.isTypeLegal(Ty)) + NumElem = i + 1; } - return true; + return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, + false, true); } // Below we handle the case of multiple consecutive stores that @@ -9668,9 +10390,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(*DAG.getContext(), StoreTy); if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && - TLI.isLoadExtLegal(ISD::ZEXTLOAD, StoreTy) && - TLI.isLoadExtLegal(ISD::SEXTLOAD, StoreTy) && - TLI.isLoadExtLegal(ISD::EXTLOAD, StoreTy)) + TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) && + TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) && + TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy)) LastLegalIntegerType = i+1; } } @@ -10108,7 +10830,8 @@ SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad( if (ResultVT.bitsGT(VecEltVT)) { // If the result type of vextract is wider than the load, then issue an // extending load instead. - ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, VecEltVT) + ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, + VecEltVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD; Load = DAG.getExtLoad( @@ -10474,6 +11197,11 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { if (!TLI.isOperationLegalOrCustom(Opcode, NVT)) return SDValue(); + // Just because the floating-point vector type is legal does not necessarily + // mean that the corresponding integer vector type is. + if (!isTypeLegal(NVT)) + return SDValue(); + SmallVector<SDValue, 8> Opnds; for (unsigned i = 0; i != NumInScalars; ++i) { SDValue In = N->getOperand(i); @@ -10519,26 +11247,37 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { return SDValue(); SDValue VecIn1, VecIn2; + bool UsesZeroVector = false; for (unsigned i = 0; i != NumInScalars; ++i) { + SDValue Op = N->getOperand(i); // Ignore undef inputs. - if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; + if (Op.getOpcode() == ISD::UNDEF) continue; + + // See if we can combine this build_vector into a blend with a zero vector. + if (!VecIn2.getNode() && ((Op.getOpcode() == ISD::Constant && + cast<ConstantSDNode>(Op.getNode())->isNullValue()) || + (Op.getOpcode() == ISD::ConstantFP && + cast<ConstantFPSDNode>(Op.getNode())->getValueAPF().isZero()))) { + UsesZeroVector = true; + continue; + } // If this input is something other than a EXTRACT_VECTOR_ELT with a // constant index, bail out. - if (N->getOperand(i).getOpcode() != ISD::EXTRACT_VECTOR_ELT || - !isa<ConstantSDNode>(N->getOperand(i).getOperand(1))) { + if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT || + !isa<ConstantSDNode>(Op.getOperand(1))) { VecIn1 = VecIn2 = SDValue(nullptr, 0); break; } // We allow up to two distinct input vectors. - SDValue ExtractedFromVec = N->getOperand(i).getOperand(0); + SDValue ExtractedFromVec = Op.getOperand(0); if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2) continue; if (!VecIn1.getNode()) { VecIn1 = ExtractedFromVec; - } else if (!VecIn2.getNode()) { + } else if (!VecIn2.getNode() && !UsesZeroVector) { VecIn2 = ExtractedFromVec; } else { // Too many inputs. @@ -10549,55 +11288,93 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { // If everything is good, we can make a shuffle operation. if (VecIn1.getNode()) { + unsigned InNumElements = VecIn1.getValueType().getVectorNumElements(); SmallVector<int, 8> Mask; for (unsigned i = 0; i != NumInScalars; ++i) { - if (N->getOperand(i).getOpcode() == ISD::UNDEF) { + unsigned Opcode = N->getOperand(i).getOpcode(); + if (Opcode == ISD::UNDEF) { Mask.push_back(-1); continue; } + // Operands can also be zero. + if (Opcode != ISD::EXTRACT_VECTOR_ELT) { + assert(UsesZeroVector && + (Opcode == ISD::Constant || Opcode == ISD::ConstantFP) && + "Unexpected node found!"); + Mask.push_back(NumInScalars+i); + continue; + } + // If extracting from the first vector, just use the index directly. SDValue Extract = N->getOperand(i); SDValue ExtVal = Extract.getOperand(1); + unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue(); if (Extract.getOperand(0) == VecIn1) { - unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue(); - if (ExtIndex > VT.getVectorNumElements()) - return SDValue(); - Mask.push_back(ExtIndex); continue; } - // Otherwise, use InIdx + VecSize - unsigned Idx = cast<ConstantSDNode>(ExtVal)->getZExtValue(); - Mask.push_back(Idx+NumInScalars); + // Otherwise, use InIdx + InputVecSize + Mask.push_back(InNumElements + ExtIndex); } + // Avoid introducing illegal shuffles with zero. + if (UsesZeroVector && !TLI.isVectorClearMaskLegal(Mask, VT)) + return SDValue(); + // We can't generate a shuffle node with mismatched input and output types. // Attempt to transform a single input vector to the correct type. if ((VT != VecIn1.getValueType())) { - // We don't support shuffeling between TWO values of different types. - if (VecIn2.getNode()) + // If the input vector type has a different base type to the output + // vector type, bail out. + EVT VTElemType = VT.getVectorElementType(); + if ((VecIn1.getValueType().getVectorElementType() != VTElemType) || + (VecIn2.getNode() && + (VecIn2.getValueType().getVectorElementType() != VTElemType))) return SDValue(); + // If the input vector is too small, widen it. // We only support widening of vectors which are half the size of the // output registers. For example XMM->YMM widening on X86 with AVX. - if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits()) - return SDValue(); + EVT VecInT = VecIn1.getValueType(); + if (VecInT.getSizeInBits() * 2 == VT.getSizeInBits()) { + // If we only have one small input, widen it by adding undef values. + if (!VecIn2.getNode()) + VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, + DAG.getUNDEF(VecIn1.getValueType())); + else if (VecIn1.getValueType() == VecIn2.getValueType()) { + // If we have two small inputs of the same type, try to concat them. + VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, VecIn2); + VecIn2 = SDValue(nullptr, 0); + } else + return SDValue(); + } else if (VecInT.getSizeInBits() == VT.getSizeInBits() * 2) { + // If the input vector is too large, try to split it. + // We don't support having two input vectors that are too large. + if (VecIn2.getNode()) + return SDValue(); - // If the input vector type has a different base type to the output - // vector type, bail out. - if (VecIn1.getValueType().getVectorElementType() != - VT.getVectorElementType()) - return SDValue(); + if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements())) + return SDValue(); - // Widen the input vector by adding undef values. - VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, - VecIn1, DAG.getUNDEF(VecIn1.getValueType())); + // Try to replace VecIn1 with two extract_subvectors + // No need to update the masks, they should still be correct. + VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, + DAG.getConstant(VT.getVectorNumElements(), TLI.getVectorIdxTy())); + VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, + DAG.getConstant(0, TLI.getVectorIdxTy())); + UsesZeroVector = false; + } else + return SDValue(); } - // If VecIn2 is unused then change it to undef. - VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); + if (UsesZeroVector) + VecIn2 = VT.isInteger() ? DAG.getConstant(0, VT) : + DAG.getConstantFP(0.0, VT); + else + // If VecIn2 is unused then change it to undef. + VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); // Check that we were able to transform all incoming values to the same // type. @@ -10656,36 +11433,56 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { } } + // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR. + // We have already tested above for an UNDEF only concatenation. // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...)) // -> (BUILD_VECTOR A, B, ..., C, D, ...) - if (N->getNumOperands() == 2 && - N->getOperand(0).getOpcode() == ISD::BUILD_VECTOR && - N->getOperand(1).getOpcode() == ISD::BUILD_VECTOR) { - EVT VT = N->getValueType(0); - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); + auto IsBuildVectorOrUndef = [](const SDValue &Op) { + return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode(); + }; + bool AllBuildVectorsOrUndefs = + std::all_of(N->op_begin(), N->op_end(), IsBuildVectorOrUndef); + if (AllBuildVectorsOrUndefs) { SmallVector<SDValue, 8> Opnds; - unsigned BuildVecNumElts = N0.getNumOperands(); - - EVT SclTy0 = N0.getOperand(0)->getValueType(0); - EVT SclTy1 = N1.getOperand(0)->getValueType(0); - if (SclTy0.isFloatingPoint()) { - for (unsigned i = 0; i != BuildVecNumElts; ++i) - Opnds.push_back(N0.getOperand(i)); - for (unsigned i = 0; i != BuildVecNumElts; ++i) - Opnds.push_back(N1.getOperand(i)); - } else { + EVT SVT = VT.getScalarType(); + + EVT MinVT = SVT; + if (!SVT.isFloatingPoint()) { // If BUILD_VECTOR are from built from integer, they may have different - // operand types. Get the smaller type and truncate all operands to it. - EVT MinTy = SclTy0.bitsLE(SclTy1) ? SclTy0 : SclTy1; - for (unsigned i = 0; i != BuildVecNumElts; ++i) - Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy, - N0.getOperand(i))); - for (unsigned i = 0; i != BuildVecNumElts; ++i) - Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy, - N1.getOperand(i))); + // operand types. Get the smallest type and truncate all operands to it. + bool FoundMinVT = false; + for (const SDValue &Op : N->ops()) + if (ISD::BUILD_VECTOR == Op.getOpcode()) { + EVT OpSVT = Op.getOperand(0)->getValueType(0); + MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT; + FoundMinVT = true; + } + assert(FoundMinVT && "Concat vector type mismatch"); + } + + for (const SDValue &Op : N->ops()) { + EVT OpVT = Op.getValueType(); + unsigned NumElts = OpVT.getVectorNumElements(); + + if (ISD::UNDEF == Op.getOpcode()) + for (unsigned i = 0; i != NumElts; ++i) + Opnds.push_back(DAG.getUNDEF(MinVT)); + + if (ISD::BUILD_VECTOR == Op.getOpcode()) { + if (SVT.isFloatingPoint()) { + assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch"); + for (unsigned i = 0; i != NumElts; ++i) + Opnds.push_back(Op.getOperand(i)); + } else { + for (unsigned i = 0; i != NumElts; ++i) + Opnds.push_back( + DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i))); + } + } } + assert(VT.getVectorNumElements() == Opnds.size() && + "Concat vector type mismatch"); return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds); } @@ -10881,7 +11678,8 @@ static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0, return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask()); } -// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat. +// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat, +// or turn a shuffle of a single concat into simpler shuffle then concat. static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); unsigned NumElts = VT.getVectorNumElements(); @@ -10895,6 +11693,18 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements(); unsigned NumConcats = NumElts / NumElemsPerConcat; + // Special case: shuffle(concat(A,B)) can be more efficiently represented + // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high + // half vector elements. + if (NumElemsPerConcat * 2 == NumElts && N1.getOpcode() == ISD::UNDEF && + std::all_of(SVN->getMask().begin() + NumElemsPerConcat, + SVN->getMask().end(), [](int i) { return i == -1; })) { + N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1), + ArrayRef<int>(SVN->getMask().begin(), NumElemsPerConcat)); + N1 = DAG.getUNDEF(ConcatVT); + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1); + } + // Look at every vector that's inserted. We're looking for exact // subvector-sized copies from a concatenated vector for (unsigned I = 0; I != NumConcats; ++I) { @@ -10993,7 +11803,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { } // If it is a splat, check if the argument vector is another splat or a - // build_vector with all scalar elements the same. + // build_vector. if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) { SDNode *V = N0.getNode(); @@ -11030,6 +11840,18 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // Splat of <x, x, x, x>, return <x, x, x, x> if (AllSame) return N0; + + // Canonicalize any other splat as a build_vector. + const SDValue &Splatted = V->getOperand(SVN->getSplatIndex()); + SmallVector<SDValue, 8> Ops(NumElts, Splatted); + SDValue NewBV = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), + V->getValueType(0), Ops); + + // We may have jumped through bitcasts, so the type of the + // BUILD_VECTOR may not match the type of the shuffle. + if (V->getValueType(0) != VT) + NewBV = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, NewBV); + return NewBV; } } @@ -11050,121 +11872,11 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return V; } - // If this shuffle node is simply a swizzle of another shuffle node, - // then try to simplify it. - if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && - N1.getOpcode() == ISD::UNDEF) { - - ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0); - - // The incoming shuffle must be of the same type as the result of the - // current shuffle. - assert(OtherSV->getOperand(0).getValueType() == VT && - "Shuffle types don't match"); - - SmallVector<int, 4> Mask; - // Compute the combined shuffle mask. - for (unsigned i = 0; i != NumElts; ++i) { - int Idx = SVN->getMaskElt(i); - assert(Idx < (int)NumElts && "Index references undef operand"); - // Next, this index comes from the first value, which is the incoming - // shuffle. Adopt the incoming index. - if (Idx >= 0) - Idx = OtherSV->getMaskElt(Idx); - Mask.push_back(Idx); - } - - // Check if all indices in Mask are Undef. In case, propagate Undef. - bool isUndefMask = true; - for (unsigned i = 0; i != NumElts && isUndefMask; ++i) - isUndefMask &= Mask[i] < 0; - - if (isUndefMask) - return DAG.getUNDEF(VT); - - bool CommuteOperands = false; - if (N0.getOperand(1).getOpcode() != ISD::UNDEF) { - // To be valid, the combine shuffle mask should only reference elements - // from one of the two vectors in input to the inner shufflevector. - bool IsValidMask = true; - for (unsigned i = 0; i != NumElts && IsValidMask; ++i) - // See if the combined mask only reference undefs or elements coming - // from the first shufflevector operand. - IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] < NumElts; - - if (!IsValidMask) { - IsValidMask = true; - for (unsigned i = 0; i != NumElts && IsValidMask; ++i) - // Check that all the elements come from the second shuffle operand. - IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] >= NumElts; - CommuteOperands = IsValidMask; - } - - // Early exit if the combined shuffle mask is not valid. - if (!IsValidMask) - return SDValue(); - } - - // See if this pair of shuffles can be safely folded according to either - // of the following rules: - // shuffle(shuffle(x, y), undef) -> x - // shuffle(shuffle(x, undef), undef) -> x - // shuffle(shuffle(x, y), undef) -> y - bool IsIdentityMask = true; - unsigned BaseMaskIndex = CommuteOperands ? NumElts : 0; - for (unsigned i = 0; i != NumElts && IsIdentityMask; ++i) { - // Skip Undefs. - if (Mask[i] < 0) - continue; - - // The combined shuffle must map each index to itself. - IsIdentityMask = (unsigned)Mask[i] == i + BaseMaskIndex; - } - - if (IsIdentityMask) { - if (CommuteOperands) - // optimize shuffle(shuffle(x, y), undef) -> y. - return OtherSV->getOperand(1); - - // optimize shuffle(shuffle(x, undef), undef) -> x - // optimize shuffle(shuffle(x, y), undef) -> x - return OtherSV->getOperand(0); - } - - // It may still be beneficial to combine the two shuffles if the - // resulting shuffle is legal. - if (TLI.isTypeLegal(VT)) { - if (!CommuteOperands) { - if (TLI.isShuffleMaskLegal(Mask, VT)) - // shuffle(shuffle(x, undef, M1), undef, M2) -> shuffle(x, undef, M3). - // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(x, undef, M3) - return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), N1, - &Mask[0]); - } else { - // Compute the commuted shuffle mask. - for (unsigned i = 0; i != NumElts; ++i) { - int idx = Mask[i]; - if (idx < 0) - continue; - else if (idx < (int)NumElts) - Mask[i] = idx + NumElts; - else - Mask[i] = idx - NumElts; - } - - if (TLI.isShuffleMaskLegal(Mask, VT)) - // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(y, undef, M3) - return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(1), N1, - &Mask[0]); - } - } - } - // Canonicalize shuffles according to rules: // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A) // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B) // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B) - if (N1.getOpcode() == ISD::VECTOR_SHUFFLE && N0.getOpcode() != ISD::UNDEF && + if (N1.getOpcode() == ISD::VECTOR_SHUFFLE && N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) { // The incoming shuffle must be of the same type as the result of the @@ -11183,13 +11895,13 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { } // Try to fold according to rules: - // shuffle(shuffle(A, B, M0), B, M1) -> shuffle(A, B, M2) - // shuffle(shuffle(A, B, M0), A, M1) -> shuffle(A, B, M2) - // shuffle(shuffle(A, Undef, M0), B, M1) -> shuffle(A, B, M2) - // shuffle(shuffle(A, Undef, M0), A, M1) -> shuffle(A, Undef, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) // Don't try to fold shuffles with illegal type. - if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && - N1.getOpcode() != ISD::UNDEF && TLI.isTypeLegal(VT)) { + // Only fold if this shuffle is the only user of the other shuffle. + if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) && + Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) { ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0); // The incoming shuffle must be of the same type as the result of the @@ -11197,14 +11909,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { assert(OtherSV->getOperand(0).getValueType() == VT && "Shuffle types don't match"); - SDValue SV0 = OtherSV->getOperand(0); - SDValue SV1 = OtherSV->getOperand(1); - bool HasSameOp0 = N1 == SV0; - bool IsSV1Undef = SV1.getOpcode() == ISD::UNDEF; - if (!HasSameOp0 && !IsSV1Undef && N1 != SV1) - // Early exit. - return SDValue(); - + SDValue SV0, SV1; SmallVector<int, 4> Mask; // Compute the combined shuffle mask for a shuffle with SV0 as the first // operand, and SV1 as the second operand. @@ -11216,14 +11921,49 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { continue; } + SDValue CurrentVec; if (Idx < (int)NumElts) { + // This shuffle index refers to the inner shuffle N0. Lookup the inner + // shuffle mask to identify which vector is actually referenced. Idx = OtherSV->getMaskElt(Idx); - if (IsSV1Undef && Idx >= (int) NumElts) - Idx = -1; // Propagate Undef. - } else - Idx = HasSameOp0 ? Idx - NumElts : Idx; + if (Idx < 0) { + // Propagate Undef. + Mask.push_back(Idx); + continue; + } + + CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0) + : OtherSV->getOperand(1); + } else { + // This shuffle index references an element within N1. + CurrentVec = N1; + } + + // Simple case where 'CurrentVec' is UNDEF. + if (CurrentVec.getOpcode() == ISD::UNDEF) { + Mask.push_back(-1); + continue; + } + + // Canonicalize the shuffle index. We don't know yet if CurrentVec + // will be the first or second operand of the combined shuffle. + Idx = Idx % NumElts; + if (!SV0.getNode() || SV0 == CurrentVec) { + // Ok. CurrentVec is the left hand side. + // Update the mask accordingly. + SV0 = CurrentVec; + Mask.push_back(Idx); + continue; + } + + // Bail out if we cannot convert the shuffle pair into a single shuffle. + if (SV1.getNode() && SV1 != CurrentVec) + return SDValue(); - Mask.push_back(Idx); + // Ok. CurrentVec is the right hand side. + // Update the mask accordingly. + SV1 = CurrentVec; + Mask.push_back(Idx + NumElts); } // Check if all indices in Mask are Undef. In case, propagate Undef. @@ -11234,34 +11974,37 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { if (isUndefMask) return DAG.getUNDEF(VT); + if (!SV0.getNode()) + SV0 = DAG.getUNDEF(VT); + if (!SV1.getNode()) + SV1 = DAG.getUNDEF(VT); + // Avoid introducing shuffles with illegal mask. - if (TLI.isShuffleMaskLegal(Mask, VT)) { - if (IsSV1Undef) - // shuffle(shuffle(A, Undef, M0), B, M1) -> shuffle(A, B, M2) - // shuffle(shuffle(A, Undef, M0), A, M1) -> shuffle(A, Undef, M2) - return DAG.getVectorShuffle(VT, SDLoc(N), SV0, N1, &Mask[0]); - return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, &Mask[0]); - } + if (!TLI.isShuffleMaskLegal(Mask, VT)) { + // Compute the commuted shuffle mask and test again. + for (unsigned i = 0; i != NumElts; ++i) { + int idx = Mask[i]; + if (idx < 0) + continue; + else if (idx < (int)NumElts) + Mask[i] = idx + NumElts; + else + Mask[i] = idx - NumElts; + } - // Compute the commuted shuffle mask. - for (unsigned i = 0; i != NumElts; ++i) { - int idx = Mask[i]; - if (idx < 0) - continue; - else if (idx < (int)NumElts) - Mask[i] = idx + NumElts; - else - Mask[i] = idx - NumElts; - } + if (!TLI.isShuffleMaskLegal(Mask, VT)) + return SDValue(); - if (TLI.isShuffleMaskLegal(Mask, VT)) { - if (IsSV1Undef) - // shuffle(shuffle(A, Undef, M0), B, M1) -> shuffle(B, A, M2) - return DAG.getVectorShuffle(VT, SDLoc(N), N1, SV0, &Mask[0]); - // shuffle(shuffle(A, B, M0), B, M1) -> shuffle(B, A, M2) - // shuffle(shuffle(A, B, M0), A, M1) -> shuffle(B, A, M2) - return DAG.getVectorShuffle(VT, SDLoc(N), SV1, SV0, &Mask[0]); + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2) + std::swap(SV0, SV1); } + + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) + return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, &Mask[0]); } return SDValue(); @@ -11322,9 +12065,11 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { return SDValue(); } - // Let's see if the target supports this vector_shuffle. + // Let's see if the target supports this vector_shuffle and make sure + // we're not running after operation legalization where it may have + // custom lowered the vector shuffles. EVT RVT = RHS.getValueType(); - if (!TLI.isVectorClearMaskLegal(Indices, RVT)) + if (LegalOperations || !TLI.isVectorClearMaskLegal(Indices, RVT)) return SDValue(); // Return the new VECTOR_SHUFFLE node. diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 8facbc2..1df4a1d 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -40,12 +40,13 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/Analysis.h" -#include "llvm/CodeGen/FastISel.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -62,7 +63,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -728,6 +728,7 @@ bool FastISel::selectPatchpoint(const CallInst *I) { // For AnyRegCC the arguments are lowered later on manually. unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs; CallLoweringInfo CLI; + CLI.setIsPatchPoint(); if (!lowerCallOperands(I, NumMetaOpers, NumCallArgs, Callee, IsAnyRegCC, CLI)) return false; @@ -1579,7 +1580,7 @@ FastISel::FastISel(FunctionLoweringInfo &FuncInfo, bool SkipTargetIndependentISel) : FuncInfo(FuncInfo), MF(FuncInfo.MF), MRI(FuncInfo.MF->getRegInfo()), MFI(*FuncInfo.MF->getFrameInfo()), MCP(*FuncInfo.MF->getConstantPool()), - TM(FuncInfo.MF->getTarget()), DL(*MF->getSubtarget().getDataLayout()), + TM(FuncInfo.MF->getTarget()), DL(*TM.getDataLayout()), TII(*MF->getSubtarget().getInstrInfo()), TLI(*MF->getSubtarget().getTargetLowering()), TRI(*MF->getSubtarget().getRegisterInfo()), LibInfo(LibInfo), diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 86b9542..7e72dc6 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -133,16 +133,17 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, ImmutableCallSite CS(I); if (isa<InlineAsm>(CS.getCalledValue())) { unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); std::vector<TargetLowering::AsmOperandInfo> Ops = - TLI->ParseConstraints(CS); + TLI->ParseConstraints(TRI, CS); for (size_t I = 0, E = Ops.size(); I != E; ++I) { TargetLowering::AsmOperandInfo &Op = Ops[I]; if (Op.Type == InlineAsm::isClobber) { // Clobbers don't have SDValue operands, hence SDValue(). TLI->ComputeConstraintToUse(Op, SDValue(), DAG); std::pair<unsigned, const TargetRegisterClass *> PhysReg = - TLI->getRegForInlineAsmConstraint(Op.ConstraintCode, - Op.ConstraintVT); + TLI->getRegForInlineAsmConstraint(TRI, Op.ConstraintCode, + Op.ConstraintVT); if (PhysReg.first == SP) MF->getFrameInfo()->setHasInlineAsmWithSPAdjust(true); } @@ -273,6 +274,7 @@ void FunctionLoweringInfo::clear() { ArgDbgValues.clear(); ByValArgFrameIndexMap.clear(); RegFixups.clear(); + StatepointStackSlots.clear(); PreferredExtendType.clear(); } @@ -470,60 +472,6 @@ void llvm::ComputeUsesVAFloatArgument(const CallInst &I, } } -/// AddCatchInfo - Extract the personality and type infos from an eh.selector -/// call, and add them to the specified machine basic block. -void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI, - MachineBasicBlock *MBB) { - // Inform the MachineModuleInfo of the personality for this landing pad. - const ConstantExpr *CE = cast<ConstantExpr>(I.getArgOperand(1)); - assert(CE->getOpcode() == Instruction::BitCast && - isa<Function>(CE->getOperand(0)) && - "Personality should be a function"); - MMI->addPersonality(MBB, cast<Function>(CE->getOperand(0))); - - // Gather all the type infos for this landing pad and pass them along to - // MachineModuleInfo. - std::vector<const GlobalValue *> TyInfo; - unsigned N = I.getNumArgOperands(); - - for (unsigned i = N - 1; i > 1; --i) { - if (const ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(i))) { - unsigned FilterLength = CI->getZExtValue(); - unsigned FirstCatch = i + FilterLength + !FilterLength; - assert(FirstCatch <= N && "Invalid filter length"); - - if (FirstCatch < N) { - TyInfo.reserve(N - FirstCatch); - for (unsigned j = FirstCatch; j < N; ++j) - TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j))); - MMI->addCatchTypeInfo(MBB, TyInfo); - TyInfo.clear(); - } - - if (!FilterLength) { - // Cleanup. - MMI->addCleanup(MBB); - } else { - // Filter. - TyInfo.reserve(FilterLength - 1); - for (unsigned j = i + 1; j < FirstCatch; ++j) - TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j))); - MMI->addFilterTypeInfo(MBB, TyInfo); - TyInfo.clear(); - } - - N = i; - } - } - - if (N > 2) { - TyInfo.reserve(N - 2); - for (unsigned j = 2; j < N; ++j) - TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j))); - MMI->addCatchTypeInfo(MBB, TyInfo); - } -} - /// AddLandingPadInfo - Extract the exception handling information from the /// landingpad instruction and add them to the specified machine module info. void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI, diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index a65f33e..93699a7 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -406,10 +406,10 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, Type *Type = CP->getType(); // MachineConstantPool wants an explicit alignment. if (Align == 0) { - Align = MF->getSubtarget().getDataLayout()->getPrefTypeAlignment(Type); + Align = MF->getTarget().getDataLayout()->getPrefTypeAlignment(Type); if (Align == 0) { // Alignment of vector types. FIXME! - Align = MF->getSubtarget().getDataLayout()->getTypeAllocSize(Type); + Align = MF->getTarget().getDataLayout()->getTypeAllocSize(Type); } } diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 5d17a5f..61c0a6f 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -40,7 +40,7 @@ using namespace llvm; #define DEBUG_TYPE "legalizedag" //===----------------------------------------------------------------------===// -/// SelectionDAGLegalize - This takes an arbitrary SelectionDAG as input and +/// This takes an arbitrary SelectionDAG as input and /// hacks on it until the target machine can handle it. This involves /// eliminating value sizes the machine cannot handle (promoting small sizes to /// large sizes or splitting up large values into small values) as well as @@ -86,7 +86,7 @@ private: void LegalizeLoadOps(SDNode *Node); void LegalizeStoreOps(SDNode *Node); - /// PerformInsertVectorEltInMemory - Some target cannot handle a variable + /// Some targets cannot handle a variable /// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it /// is necessary to spill the vector being inserted into to memory, perform /// the insert there, and then read the result back. @@ -95,7 +95,7 @@ private: SDValue ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, SDLoc dl); - /// ShuffleWithNarrowerEltType - Return a vector shuffle operation which + /// Return a vector shuffle operation which /// performs the same shuffe in terms of order or result bytes, but on a type /// whose vector element type is narrower than the original shuffle type. /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> @@ -200,7 +200,7 @@ public: }; } -/// ShuffleWithNarrowerEltType - Return a vector shuffle operation which +/// Return a vector shuffle operation which /// performs the same shuffe in terms of order or result bytes, but on a type /// whose vector element type is narrower than the original shuffle type. /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> @@ -232,7 +232,7 @@ SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, SDLoc dl, return DAG.getVectorShuffle(NVT, dl, N1, N2, &NewMask[0]); } -/// ExpandConstantFP - Expands the ConstantFP node to an integer constant or +/// Expands the ConstantFP node to an integer constant or /// a load from the constant pool. SDValue SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { @@ -260,7 +260,7 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { if (ConstantFPSDNode::isValueValidForType(SVT, CFP->getValueAPF()) && // Only do this if the target has a native EXTLOAD instruction from // smaller type. - TLI.isLoadExtLegal(ISD::EXTLOAD, SVT) && + TLI.isLoadExtLegal(ISD::EXTLOAD, OrigVT, SVT) && TLI.ShouldShrinkFPConstant(OrigVT)) { Type *SType = SVT.getTypeForEVT(*DAG.getContext()); LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC, SType)); @@ -286,7 +286,7 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { return Result; } -/// ExpandUnalignedStore - Expands an unaligned store to 2 half-size stores. +/// Expands an unaligned store to 2 half-size stores. static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, const TargetLowering &TLI, SelectionDAGLegalize *DAGLegalize) { @@ -409,7 +409,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, DAGLegalize->ReplaceNode(SDValue(ST, 0), Result); } -/// ExpandUnalignedLoad - Expands an unaligned load to 2 half-size loads. +/// Expands an unaligned load to 2 half-size loads. static void ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, const TargetLowering &TLI, @@ -561,8 +561,8 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, ChainResult = TF; } -/// PerformInsertVectorEltInMemory - Some target cannot handle a variable -/// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it +/// Some target cannot handle a variable insertion index for the +/// INSERT_VECTOR_ELT instruction. In this case, it /// is necessary to spill the vector being inserted into to memory, perform /// the insert there, and then read the result back. SDValue SelectionDAGLegalize:: @@ -725,14 +725,13 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty); if (Align < ABIAlignment) - ExpandUnalignedStore(cast<StoreSDNode>(Node), - DAG, TLI, this); + ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this); } break; } case TargetLowering::Custom: { SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); - if (Res.getNode()) + if (Res && Res != SDValue(Node, 0)) ReplaceNode(SDValue(Node, 0), Res); return; } @@ -766,8 +765,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { Value = DAG.getZeroExtendInReg(Value, dl, StVT); SDValue Result = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), - NVT, isVolatile, isNonTemporal, Alignment, - AAInfo); + NVT, isVolatile, isNonTemporal, Alignment, AAInfo); ReplaceNode(SDValue(Node, 0), Result); } else if (StWidth & (StWidth - 1)) { // If not storing a power-of-2 number of bits, expand as two stores. @@ -845,7 +843,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { } case TargetLowering::Custom: { SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); - if (Res.getNode()) + if (Res && Res != SDValue(Node, 0)) ReplaceNode(SDValue(Node, 0), Res); return; } @@ -946,7 +944,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // nice to have an effective generic way of getting these benefits... // Until such a way is found, don't insist on promoting i1 here. (SrcVT != MVT::i1 || - TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) { + TLI.getLoadExtAction(ExtType, Node->getValueType(0), MVT::i1) == + TargetLowering::Promote)) { // Promote to a byte-sized load if not loading an integral number of // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24. unsigned NewWidth = SrcVT.getStoreSizeInBits(); @@ -1058,7 +1057,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { Chain = Ch; } else { bool isCustom = false; - switch (TLI.getLoadExtAction(ExtType, SrcVT.getSimpleVT())) { + switch (TLI.getLoadExtAction(ExtType, Node->getValueType(0), + SrcVT.getSimpleVT())) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Custom: isCustom = true; @@ -1080,36 +1080,35 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { unsigned AS = LD->getAddressSpace(); unsigned Align = LD->getAlignment(); if (!TLI.allowsMisalignedMemoryAccesses(MemVT, AS, Align)) { - Type *Ty = - LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = - TLI.getDataLayout()->getABITypeAlignment(Ty); + Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment = TLI.getDataLayout()->getABITypeAlignment(Ty); if (Align < ABIAlignment){ - ExpandUnalignedLoad(cast<LoadSDNode>(Node), - DAG, TLI, Value, Chain); + ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, Value, Chain); } } } break; } case TargetLowering::Expand: - if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && - TLI.isTypeLegal(SrcVT)) { - SDValue Load = DAG.getLoad(SrcVT, dl, Chain, Ptr, - LD->getMemOperand()); - unsigned ExtendOp; - switch (ExtType) { - case ISD::EXTLOAD: - ExtendOp = (SrcVT.isFloatingPoint() ? - ISD::FP_EXTEND : ISD::ANY_EXTEND); + if (!TLI.isLoadExtLegal(ISD::EXTLOAD, Node->getValueType(0), SrcVT)) { + // If the source type is not legal, see if there is a legal extload to + // an intermediate type that we can then extend further. + EVT LoadVT = TLI.getRegisterType(SrcVT.getSimpleVT()); + if (TLI.isTypeLegal(SrcVT) || // Same as SrcVT == LoadVT? + TLI.isLoadExtLegal(ExtType, LoadVT, SrcVT)) { + // If we are loading a legal type, this is a non-extload followed by a + // full extend. + ISD::LoadExtType MidExtType = + (LoadVT == SrcVT) ? ISD::NON_EXTLOAD : ExtType; + + SDValue Load = DAG.getExtLoad(MidExtType, dl, LoadVT, Chain, Ptr, + SrcVT, LD->getMemOperand()); + unsigned ExtendOp = + ISD::getExtForLoadExtType(SrcVT.isFloatingPoint(), ExtType); + Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); + Chain = Load.getValue(1); break; - case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break; - case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break; - default: llvm_unreachable("Unexpected extend load type!"); } - Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); - Chain = Load.getValue(1); - break; } assert(!SrcVT.isVector() && @@ -1133,8 +1132,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { Result.getValueType(), Result, DAG.getValueType(SrcVT)); else - ValRes = DAG.getZeroExtendInReg(Result, dl, - SrcVT.getScalarType()); + ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType()); Value = ValRes; Chain = Result.getValue(1); break; @@ -1155,8 +1153,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { } } -/// LegalizeOp - Return a legal replacement for the given operation, with -/// all legal operands. +/// Return a legal replacement for the given operation, with all legal operands. void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { DEBUG(dbgs() << "\nLegalizing: "; Node->dump(&DAG)); @@ -1642,8 +1639,8 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, Results.push_back(Tmp2); } -/// LegalizeSetCCCondCode - Legalize a SETCC with given LHS and RHS and -/// condition code CC on the current target. +/// Legalize a SETCC with given LHS and RHS and condition code CC on the current +/// target. /// /// If the SETCC has been legalized using AND / OR, then the legalized node /// will be stored in LHS. RHS and CC will be set to SDValue(). NeedInvert @@ -1757,7 +1754,7 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, return false; } -/// EmitStackConvert - Emit a store/load combination to the stack. This stores +/// Emit a store/load combination to the stack. This stores /// SrcOp to a stack slot of type SlotVT, truncating it if needed. It then does /// a load from the stack slot to DestVT, extending it if needed. /// The resultant code need not be legal. @@ -1917,7 +1914,7 @@ ExpandBVWithShuffles(SDNode *Node, SelectionDAG &DAG, return true; } -/// ExpandBUILD_VECTOR - Expand a BUILD_VECTOR node on targets that don't +/// Expand a BUILD_VECTOR node on targets that don't /// support the operation, but do support the resultant vector type. SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { unsigned NumElems = Node->getNumOperands(); @@ -2029,7 +2026,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { return ExpandVectorBuildThroughStack(Node); } -// ExpandLibCall - Expand a node into a call to a libcall. If the result value +// Expand a node into a call to a libcall. If the result value // does not fit into a register, return the lo part and set the hi part to the // by-reg argument. If it does fit into a single register, return the result // and leave the Hi part unset. @@ -2077,7 +2074,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, return CallInfo.first; } -/// ExpandLibCall - Generate a libcall taking the given operands as arguments +/// Generate a libcall taking the given operands as arguments /// and returning a result of type RetVT. SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops, unsigned NumOps, @@ -2108,7 +2105,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, return CallInfo.first; } -// ExpandChainLibCall - Expand a node into a call to a libcall. Similar to +// Expand a node into a call to a libcall. Similar to // ExpandLibCall except that the first operand is the in-chain. std::pair<SDValue, SDValue> SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, @@ -2178,7 +2175,7 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, return ExpandLibCall(LC, Node, isSigned); } -/// isDivRemLibcallAvailable - Return true if divmod libcall is available. +/// Return true if divmod libcall is available. static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, const TargetLowering &TLI) { RTLIB::Libcall LC; @@ -2194,8 +2191,7 @@ static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, return TLI.getLibcallName(LC) != nullptr; } -/// useDivRem - Only issue divrem libcall if both quotient and remainder are -/// needed. +/// Only issue divrem libcall if both quotient and remainder are needed. static bool useDivRem(SDNode *Node, bool isSigned, bool isDIV) { // The other use might have been replaced with a divrem already. unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; @@ -2220,8 +2216,7 @@ static bool useDivRem(SDNode *Node, bool isSigned, bool isDIV) { return false; } -/// ExpandDivRemLibCall - Issue libcalls to __{u}divmod to compute div / rem -/// pairs. +/// Issue libcalls to __{u}divmod to compute div / rem pairs. void SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results) { @@ -2283,7 +2278,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, Results.push_back(Rem); } -/// isSinCosLibcallAvailable - Return true if sincos libcall is available. +/// Return true if sincos libcall is available. static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) { RTLIB::Libcall LC; switch (Node->getSimpleValueType(0).SimpleTy) { @@ -2297,8 +2292,8 @@ static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) { return TLI.getLibcallName(LC) != nullptr; } -/// canCombineSinCosLibcall - Return true if sincos libcall is available and -/// can be used to combine sin and cos. +/// Return true if sincos libcall is available and can be used to combine sin +/// and cos. static bool canCombineSinCosLibcall(SDNode *Node, const TargetLowering &TLI, const TargetMachine &TM) { if (!isSinCosLibcallAvailable(Node, TLI)) @@ -2311,8 +2306,7 @@ static bool canCombineSinCosLibcall(SDNode *Node, const TargetLowering &TLI, return true; } -/// useSinCos - Only issue sincos libcall if both sin and cos are -/// needed. +/// Only issue sincos libcall if both sin and cos are needed. static bool useSinCos(SDNode *Node) { unsigned OtherOpcode = Node->getOpcode() == ISD::FSIN ? ISD::FCOS : ISD::FSIN; @@ -2330,8 +2324,7 @@ static bool useSinCos(SDNode *Node) { return false; } -/// ExpandSinCosLibCall - Issue libcalls to sincos to compute sin / cos -/// pairs. +/// Issue libcalls to sincos to compute sin / cos pairs. void SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results) { @@ -2396,7 +2389,7 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, MachinePointerInfo(), false, false, false, 0)); } -/// ExpandLegalINT_TO_FP - This function is responsible for legalizing a +/// This function is responsible for legalizing a /// INT_TO_FP operation of the specified operand when the target requests that /// we expand it. At this point, we know that the result and operand types are /// legal for the target. @@ -2594,7 +2587,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg); } -/// PromoteLegalINT_TO_FP - This function is responsible for legalizing a +/// This function is responsible for legalizing a /// *INT_TO_FP operation of the specified operand when the target requests that /// we promote it. At this point, we know that the result and operand types are /// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP @@ -2636,7 +2629,7 @@ SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, dl, NewInTy, LegalOp)); } -/// PromoteLegalFP_TO_INT - This function is responsible for legalizing a +/// This function is responsible for legalizing a /// FP_TO_*INT operation of the specified operand when the target requests that /// we promote it. At this point, we know that the result and operand types are /// legal for the target, and that there is a legal FP_TO_UINT or FP_TO_SINT @@ -2680,8 +2673,7 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation); } -/// ExpandBSWAP - Open code the operations for BSWAP of the specified operation. -/// +/// Open code the operations for BSWAP of the specified operation. SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, SDLoc dl) { EVT VT = Op.getValueType(); EVT SHVT = TLI.getShiftAmountTy(VT); @@ -2727,8 +2719,7 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, SDLoc dl) { } } -/// ExpandBitCount - Expand the specified bitcount instruction into operations. -/// +/// Expand the specified bitcount instruction into operations. SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, SDLoc dl) { switch (Opc) { @@ -3528,6 +3519,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { RTLIB::FMA_F80, RTLIB::FMA_F128, RTLIB::FMA_PPCF128)); break; + case ISD::FMAD: + llvm_unreachable("Illegal fmad should never be formed"); + case ISD::FADD: Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64, RTLIB::ADD_F80, RTLIB::ADD_F128, @@ -3554,6 +3548,21 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { break; } case ISD::FP_TO_FP16: { + if (!TM.Options.UseSoftFloat && TM.Options.UnsafeFPMath) { + SDValue Op = Node->getOperand(0); + MVT SVT = Op.getSimpleValueType(); + if ((SVT == MVT::f64 || SVT == MVT::f80) && + TLI.isOperationLegalOrCustom(ISD::FP_TO_FP16, MVT::f32)) { + // Under fastmath, we can expand this node into a fround followed by + // a float-half conversion. + SDValue FloatVal = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Op, + DAG.getIntPtrConstant(0)); + Results.push_back( + DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, FloatVal)); + break; + } + } + RTLIB::Libcall LC = RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::f16); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to expand fp_to_fp16"); @@ -4319,8 +4328,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { ReplaceNode(Node, Results.data()); } -// SelectionDAG::Legalize - This is the entry point for the file. -// +/// This is the entry point for the file. void SelectionDAG::Legalize() { AssignTopologicalOrder(); diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 4591e79..b596715 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -658,7 +658,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { NVT, N->getOperand(0)); return TLI.makeLibCall(DAG, LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT), - &Op, 1, false, dl).first; + &Op, 1, Signed, dl).first; } diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index b73bb0a..5507c70 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -66,6 +66,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::EXTRACT_VECTOR_ELT: Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break; case ISD::LOAD: Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N));break; + case ISD::MLOAD: Res = PromoteIntRes_MLOAD(cast<MaskedLoadSDNode>(N));break; case ISD::SELECT: Res = PromoteIntRes_SELECT(N); break; case ISD::VSELECT: Res = PromoteIntRes_VSELECT(N); break; case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break; @@ -454,6 +455,24 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) { return Res; } +SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue ExtSrc0 = GetPromotedInteger(N->getSrc0()); + + SDValue Mask = N->getMask(); + EVT NewMaskVT = getSetCCResultType(NVT); + if (NewMaskVT != N->getMask().getValueType()) + Mask = PromoteTargetBoolean(Mask, NewMaskVT); + SDLoc dl(N); + + SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(), + Mask, ExtSrc0, N->getMemoryVT(), + N->getMemOperand(), ISD::SEXTLOAD); + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} /// Promote the overflow flag of an overflowing arithmetic node. SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) { // Simply change the return type of the boolean result. @@ -825,6 +844,10 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::SINT_TO_FP: Res = PromoteIntOp_SINT_TO_FP(N); break; case ISD::STORE: Res = PromoteIntOp_STORE(cast<StoreSDNode>(N), OpNo); break; + case ISD::MSTORE: Res = PromoteIntOp_MSTORE(cast<MaskedStoreSDNode>(N), + OpNo); break; + case ISD::MLOAD: Res = PromoteIntOp_MLOAD(cast<MaskedLoadSDNode>(N), + OpNo); break; case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break; case ISD::FP16_TO_FP: case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break; @@ -1091,6 +1114,64 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){ N->getMemoryVT(), N->getMemOperand()); } +SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo){ + + assert(OpNo == 2 && "Only know how to promote the mask!"); + SDValue DataOp = N->getValue(); + EVT DataVT = DataOp.getValueType(); + SDValue Mask = N->getMask(); + EVT MaskVT = Mask.getValueType(); + SDLoc dl(N); + + bool TruncateStore = false; + if (!TLI.isTypeLegal(DataVT)) { + if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger) { + DataOp = GetPromotedInteger(DataOp); + Mask = PromoteTargetBoolean(Mask, DataOp.getValueType()); + TruncateStore = true; + } + else { + assert(getTypeAction(DataVT) == TargetLowering::TypeWidenVector && + "Unexpected data legalization in MSTORE"); + DataOp = GetWidenedVector(DataOp); + + if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector) + Mask = GetWidenedVector(Mask); + else { + EVT BoolVT = getSetCCResultType(DataOp.getValueType()); + + // We can't use ModifyToType() because we should fill the mask with + // zeroes + unsigned WidenNumElts = BoolVT.getVectorNumElements(); + unsigned MaskNumElts = MaskVT.getVectorNumElements(); + + unsigned NumConcat = WidenNumElts / MaskNumElts; + SmallVector<SDValue, 16> Ops(NumConcat); + SDValue ZeroVal = DAG.getConstant(0, MaskVT); + Ops[0] = Mask; + for (unsigned i = 1; i != NumConcat; ++i) + Ops[i] = ZeroVal; + + Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops); + } + } + } + else + Mask = PromoteTargetBoolean(N->getMask(), DataOp.getValueType()); + return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(), Mask, + N->getMemoryVT(), N->getMemOperand(), + TruncateStore); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo){ + assert(OpNo == 2 && "Only know how to promote the mask!"); + EVT DataVT = N->getValueType(0); + SDValue Mask = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); + SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end()); + NewOps[OpNo] = Mask; + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); +} + SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), Op); @@ -2936,17 +3017,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) { EVT VT = N->getValueType(0); SDLoc dl(N); - unsigned NumElts = VT.getVectorNumElements(); - SmallVector<int, 8> NewMask; - for (unsigned i = 0; i != NumElts; ++i) { - NewMask.push_back(SV->getMaskElt(i)); - } + ArrayRef<int> NewMask = SV->getMask().slice(0, VT.getVectorNumElements()); SDValue V0 = GetPromotedInteger(N->getOperand(0)); SDValue V1 = GetPromotedInteger(N->getOperand(1)); EVT OutVT = V0.getValueType(); - return DAG.getVectorShuffle(OutVT, dl, V0, V1, &NewMask[0]); + return DAG.getVectorShuffle(OutVT, dl, V0, V1, NewMask); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index bd7dacf..ebf6b28 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -921,6 +921,17 @@ bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) { // The target didn't want to custom lower it after all. return false; + // When called from DAGTypeLegalizer::ExpandIntegerResult, we might need to + // provide the same kind of custom splitting behavior. + if (Results.size() == N->getNumValues() + 1 && LegalizeResult) { + // We've legalized a return type by splitting it. If there is a chain, + // replace that too. + SetExpandedInteger(SDValue(N, 0), Results[0], Results[1]); + if (N->getNumValues() > 1) + ReplaceValueWith(SDValue(N, 1), Results[2]); + return true; + } + // Make everything that once used N's values now use those in Results instead. assert(Results.size() == N->getNumValues() && "Custom lowering returned the wrong number of results!"); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 30f412b..cef3fc9 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -240,6 +240,7 @@ private: SDValue PromoteIntRes_FP_TO_FP16(SDNode *N); SDValue PromoteIntRes_INT_EXTEND(SDNode *N); SDValue PromoteIntRes_LOAD(LoadSDNode *N); + SDValue PromoteIntRes_MLOAD(MaskedLoadSDNode *N); SDValue PromoteIntRes_Overflow(SDNode *N); SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo); SDValue PromoteIntRes_SDIV(SDNode *N); @@ -285,6 +286,8 @@ private: SDValue PromoteIntOp_TRUNCATE(SDNode *N); SDValue PromoteIntOp_UINT_TO_FP(SDNode *N); SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N); + SDValue PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo); + SDValue PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo); void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code); @@ -578,6 +581,7 @@ private: void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_MLOAD(MaskedLoadSDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -594,6 +598,7 @@ private: SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo); + SDValue SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo); SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N); SDValue SplitVecOp_TRUNCATE(SDNode *N); SDValue SplitVecOp_VSETCC(SDNode *N); @@ -627,6 +632,7 @@ private: SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N); SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); SDValue WidenVecRes_LOAD(SDNode* N); + SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N); SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N); SDValue WidenVecRes_SIGN_EXTEND_INREG(SDNode* N); SDValue WidenVecRes_SELECT(SDNode* N); @@ -653,6 +659,7 @@ private: SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue WidenVecOp_STORE(SDNode* N); + SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo); SDValue WidenVecOp_SETCC(SDNode* N); SDValue WidenVecOp_Convert(SDNode *N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index b5af7b7..03c2734 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -200,12 +200,15 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { LoadSDNode *LD = cast<LoadSDNode>(Op.getNode()); ISD::LoadExtType ExtType = LD->getExtensionType(); if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) - switch (TLI.getLoadExtAction(LD->getExtensionType(), LD->getMemoryVT())) { + switch (TLI.getLoadExtAction(LD->getExtensionType(), LD->getValueType(0), + LD->getMemoryVT())) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: return TranslateLegalizeResults(Op, Result); case TargetLowering::Custom: if (SDValue Lowered = TLI.LowerOperation(Result, DAG)) { + if (Lowered == Result) + return TranslateLegalizeResults(Op, Lowered); Changed = true; if (Lowered->getNumValues() != Op->getNumValues()) { // This expanded to something other than the load. Assume the @@ -231,9 +234,11 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: return TranslateLegalizeResults(Op, Result); - case TargetLowering::Custom: - Changed = true; - return TranslateLegalizeResults(Op, TLI.LowerOperation(Result, DAG)); + case TargetLowering::Custom: { + SDValue Lowered = TLI.LowerOperation(Result, DAG); + Changed = Lowered != Result; + return TranslateLegalizeResults(Op, Lowered); + } case TargetLowering::Expand: Changed = true; return LegalizeOp(ExpandStore(Op)); @@ -389,7 +394,8 @@ SDValue VectorLegalizer::Promote(SDValue Op) { if (Op.getOperand(j) .getValueType() .getVectorElementType() - .isFloatingPoint()) + .isFloatingPoint() && + NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()) Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Op.getOperand(j)); else Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j)); @@ -398,8 +404,9 @@ SDValue VectorLegalizer::Promote(SDValue Op) { } Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands); - if (VT.isFloatingPoint() || - (VT.isVector() && VT.getVectorElementType().isFloatingPoint())) + if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) || + (VT.isVector() && VT.getVectorElementType().isFloatingPoint() && + NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())) return DAG.getNode(ISD::FP_ROUND, dl, VT, Op, DAG.getIntPtrConstant(0)); else return DAG.getNode(ISD::BITCAST, dl, VT, Op); @@ -509,7 +516,8 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { ScalarLoad = DAG.getLoad(WideVT, dl, Chain, BasePTR, LD->getPointerInfo().getWithOffset(Offset), LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), LD->getAlignment(), + LD->isInvariant(), + MinAlign(LD->getAlignment(), Offset), LD->getAAInfo()); } else { EVT LoadVT = WideVT; @@ -521,7 +529,8 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { LD->getPointerInfo().getWithOffset(Offset), LoadVT, LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(), - LD->getAlignment(), LD->getAAInfo()); + MinAlign(LD->getAlignment(), Offset), + LD->getAAInfo()); } RemainingBytes -= LoadBytes; @@ -553,9 +562,9 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { BitOffset += SrcEltBits; if (BitOffset >= WideBits) { WideIdx++; - Offset -= WideBits; - if (Offset > 0) { - ShAmt = DAG.getConstant(SrcEltBits - Offset, + BitOffset -= WideBits; + if (BitOffset > 0) { + ShAmt = DAG.getConstant(SrcEltBits - BitOffset, TLI.getShiftAmountTy(WideVT)); Hi = DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt); Hi = DAG.getNode(ISD::AND, dl, WideVT, Hi, SrcEltBitMask); @@ -592,7 +601,7 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride), SrcVT.getScalarType(), LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(), - LD->getAlignment(), LD->getAAInfo()); + MinAlign(LD->getAlignment(), Idx * Stride), LD->getAAInfo()); BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, DAG.getConstant(Stride, BasePTR.getValueType())); @@ -651,7 +660,8 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) { // This scalar TruncStore may be illegal, but we legalize it later. SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR, ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT, - isVolatile, isNonTemporal, Alignment, AAInfo); + isVolatile, isNonTemporal, MinAlign(Alignment, Idx*Stride), + AAInfo); BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, DAG.getConstant(Stride, BasePTR.getValueType())); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 27f63d2..63671f7 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -597,6 +597,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::LOAD: SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break; + case ISD::MLOAD: + SplitVecRes_MLOAD(cast<MaskedLoadSDNode>(N), Lo, Hi); + break; case ISD::SETCC: SplitVecRes_SETCC(N, Lo, Hi); break; @@ -979,6 +982,67 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, ReplaceValueWith(SDValue(LD, 1), Ch); } +void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, + SDValue &Lo, SDValue &Hi) { + EVT LoVT, HiVT; + SDLoc dl(MLD); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0)); + + SDValue Ch = MLD->getChain(); + SDValue Ptr = MLD->getBasePtr(); + SDValue Mask = MLD->getMask(); + unsigned Alignment = MLD->getOriginalAlignment(); + ISD::LoadExtType ExtType = MLD->getExtensionType(); + + // if Alignment is equal to the vector size, + // take the half of it for the second part + unsigned SecondHalfAlignment = + (Alignment == MLD->getValueType(0).getSizeInBits()/8) ? + Alignment/2 : Alignment; + + SDValue MaskLo, MaskHi; + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + + EVT MemoryVT = MLD->getMemoryVT(); + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + SDValue Src0 = MLD->getSrc0(); + SDValue Src0Lo, Src0Hi; + std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl); + + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(MLD->getPointerInfo(), + MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), + Alignment, MLD->getAAInfo(), MLD->getRanges()); + + Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, Src0Lo, LoMemVT, MMO, + ExtType); + + unsigned IncrementSize = LoMemVT.getSizeInBits()/8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, Ptr.getValueType())); + + MMO = DAG.getMachineFunction(). + getMachineMemOperand(MLD->getPointerInfo(), + MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), + SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); + + Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, Src0Hi, HiMemVT, MMO, + ExtType); + + + // Build a factor node to remember that this load is independent of the + // other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(MLD, 1), Ch); + +} + void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) { assert(N->getValueType(0).isVector() && N->getOperand(0).getValueType().isVector() && @@ -1234,6 +1298,9 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::STORE: Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo); break; + case ISD::MSTORE: + Res = SplitVecOp_MSTORE(cast<MaskedStoreSDNode>(N), OpNo); + break; case ISD::VSELECT: Res = SplitVecOp_VSELECT(N, OpNo); break; @@ -1395,6 +1462,58 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { MachinePointerInfo(), EltVT, false, false, false, 0); } +SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, + unsigned OpNo) { + SDValue Ch = N->getChain(); + SDValue Ptr = N->getBasePtr(); + SDValue Mask = N->getMask(); + SDValue Data = N->getValue(); + EVT MemoryVT = N->getMemoryVT(); + unsigned Alignment = N->getOriginalAlignment(); + SDLoc DL(N); + + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + SDValue DataLo, DataHi; + GetSplitVector(Data, DataLo, DataHi); + SDValue MaskLo, MaskHi; + GetSplitVector(Mask, MaskLo, MaskHi); + + // if Alignment is equal to the vector size, + // take the half of it for the second part + unsigned SecondHalfAlignment = + (Alignment == Data->getValueType(0).getSizeInBits()/8) ? + Alignment/2 : Alignment; + + SDValue Lo, Hi; + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(N->getPointerInfo(), + MachineMemOperand::MOStore, LoMemVT.getStoreSize(), + Alignment, N->getAAInfo(), N->getRanges()); + + Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO, + N->isTruncatingStore()); + + unsigned IncrementSize = LoMemVT.getSizeInBits()/8; + Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, Ptr.getValueType())); + + MMO = DAG.getMachineFunction(). + getMachineMemOperand(N->getPointerInfo(), + MachineMemOperand::MOStore, HiMemVT.getStoreSize(), + SecondHalfAlignment, N->getAAInfo(), N->getRanges()); + + Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO, + N->isTruncatingStore()); + + + // Build a factor node to remember that this store is independent of the + // other one. + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); + +} + SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { assert(N->isUnindexed() && "Indexed store of vector?"); assert(OpNo == 1 && "Can only split the stored value"); @@ -1599,6 +1718,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::VECTOR_SHUFFLE: Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N)); break; + case ISD::MLOAD: + Res = WidenVecRes_MLOAD(cast<MaskedLoadSDNode>(N)); + break; case ISD::ADD: case ISD::AND: @@ -2289,6 +2411,44 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { return Result; } +SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) { + + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),N->getValueType(0)); + SDValue Mask = N->getMask(); + EVT MaskVT = Mask.getValueType(); + SDValue Src0 = GetWidenedVector(N->getSrc0()); + ISD::LoadExtType ExtType = N->getExtensionType(); + SDLoc dl(N); + + if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector) + Mask = GetWidenedVector(Mask); + else { + EVT BoolVT = getSetCCResultType(WidenVT); + + // We can't use ModifyToType() because we should fill the mask with + // zeroes + unsigned WidenNumElts = BoolVT.getVectorNumElements(); + unsigned MaskNumElts = MaskVT.getVectorNumElements(); + + unsigned NumConcat = WidenNumElts / MaskNumElts; + SmallVector<SDValue, 16> Ops(NumConcat); + SDValue ZeroVal = DAG.getConstant(0, MaskVT); + Ops[0] = Mask; + for (unsigned i = 1; i != NumConcat; ++i) + Ops[i] = ZeroVal; + + Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops); + } + + SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(), + Mask, Src0, N->getMemoryVT(), + N->getMemOperand(), ExtType); + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), @@ -2434,6 +2594,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break; case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::STORE: Res = WidenVecOp_STORE(N); break; + case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break; case ISD::SETCC: Res = WidenVecOp_SETCC(N); break; case ISD::ANY_EXTEND: @@ -2632,6 +2793,42 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain); } +SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) { + MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N); + SDValue Mask = MST->getMask(); + EVT MaskVT = Mask.getValueType(); + SDValue StVal = MST->getValue(); + // Widen the value + SDValue WideVal = GetWidenedVector(StVal); + SDLoc dl(N); + + if (OpNo == 2 || getTypeAction(MaskVT) == TargetLowering::TypeWidenVector) + Mask = GetWidenedVector(Mask); + else { + // The mask should be widened as well + EVT BoolVT = getSetCCResultType(WideVal.getValueType()); + // We can't use ModifyToType() because we should fill the mask with + // zeroes + unsigned WidenNumElts = BoolVT.getVectorNumElements(); + unsigned MaskNumElts = MaskVT.getVectorNumElements(); + + unsigned NumConcat = WidenNumElts / MaskNumElts; + SmallVector<SDValue, 16> Ops(NumConcat); + SDValue ZeroVal = DAG.getConstant(0, MaskVT); + Ops[0] = Mask; + for (unsigned i = 1; i != NumConcat; ++i) + Ops[i] = ZeroVal; + + Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops); + } + assert(Mask.getValueType().getVectorNumElements() == + WideVal.getValueType().getVectorNumElements() && + "Mask and data vectors should have the same number of elements"); + return DAG.getMaskedStore(MST->getChain(), dl, WideVal, MST->getBasePtr(), + Mask, MST->getMemoryVT(), MST->getMemOperand(), + false); +} + SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { SDValue InOp0 = GetWidenedVector(N->getOperand(0)); SDValue InOp1 = GetWidenedVector(N->getOperand(1)); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 8b9f618..3853ada 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -137,13 +137,9 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, } // Helper for AddGlue to clone node operands. -static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG, - SmallVectorImpl<EVT> &VTs, +static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG, ArrayRef<EVT> VTs, SDValue ExtraOper = SDValue()) { - SmallVector<SDValue, 8> Ops; - for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I) - Ops.push_back(N->getOperand(I)); - + SmallVector<SDValue, 8> Ops(N->op_begin(), N->op_end()); if (ExtraOper.getNode()) Ops.push_back(ExtraOper); @@ -165,7 +161,6 @@ static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG, } static bool AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) { - SmallVector<EVT, 4> VTs; SDNode *GlueDestNode = Glue.getNode(); // Don't add glue from a node to itself. @@ -179,9 +174,7 @@ static bool AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) { // Don't add glue to something that already has a glue value. if (N->getValueType(N->getNumValues() - 1) == MVT::Glue) return false; - for (unsigned I = 0, E = N->getNumValues(); I != E; ++I) - VTs.push_back(N->getValueType(I)); - + SmallVector<EVT, 4> VTs(N->value_begin(), N->value_end()); if (AddGlue) VTs.push_back(MVT::Glue); @@ -197,11 +190,8 @@ static void RemoveUnusedGlue(SDNode *N, SelectionDAG *DAG) { !N->hasAnyUseOfValue(N->getNumValues() - 1)) && "expected an unused glue value"); - SmallVector<EVT, 4> VTs; - for (unsigned I = 0, E = N->getNumValues()-1; I != E; ++I) - VTs.push_back(N->getValueType(I)); - - CloneNodeWithValues(N, DAG, VTs); + CloneNodeWithValues(N, DAG, + makeArrayRef(N->value_begin(), N->getNumValues() - 1)); } /// ClusterNeighboringLoads - Force nearby loads together by "gluing" them. @@ -551,6 +541,14 @@ void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() { NodeNumDefs = 0; return; } + if (POpc == TargetOpcode::PATCHPOINT && + Node->getValueType(0) == MVT::Other) { + // PATCHPOINT is defined to have one result, but it might really have none + // if we're not using CallingConv::AnyReg. Don't mistake the chain for a + // real definition. + NodeNumDefs = 0; + return; + } unsigned NRegDefs = SchedDAG->TII->get(Node->getMachineOpcode()).getNumDefs(); // Some instructions define regs that are not represented in the selection DAG // (e.g. unused flags). See tMOVi8. Make sure we don't access past NumValues. diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 7961e66..9466f4d 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -234,10 +234,10 @@ bool ISD::allOperandsUndef(const SDNode *N) { return true; } -ISD::NodeType ISD::getExtForLoadExtType(ISD::LoadExtType ExtType) { +ISD::NodeType ISD::getExtForLoadExtType(bool IsFP, ISD::LoadExtType ExtType) { switch (ExtType) { case ISD::EXTLOAD: - return ISD::ANY_EXTEND; + return IsFP ? ISD::FP_EXTEND : ISD::ANY_EXTEND; case ISD::SEXTLOAD: return ISD::SIGN_EXTEND; case ISD::ZEXTLOAD: @@ -1484,6 +1484,34 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, if (N1.getOpcode() == ISD::UNDEF) commuteShuffle(N1, N2, MaskVec); + // If shuffling a splat, try to blend the splat instead. We do this here so + // that even when this arises during lowering we don't have to re-handle it. + auto BlendSplat = [&](BuildVectorSDNode *BV, int Offset) { + BitVector UndefElements; + SDValue Splat = BV->getSplatValue(&UndefElements); + if (!Splat) + return; + + for (int i = 0; i < (int)NElts; ++i) { + if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + (int)NElts)) + continue; + + // If this input comes from undef, mark it as such. + if (UndefElements[MaskVec[i] - Offset]) { + MaskVec[i] = -1; + continue; + } + + // If we can blend a non-undef lane, use that instead. + if (!UndefElements[i]) + MaskVec[i] = i + Offset; + } + }; + if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1)) + BlendSplat(N1BV, 0); + if (auto *N2BV = dyn_cast<BuildVectorSDNode>(N2)) + BlendSplat(N2BV, NElts); + // Canonicalize all index into lhs, -> shuffle lhs, undef // Canonicalize all index into rhs, -> shuffle rhs, undef bool AllLHS = true, AllRHS = true; @@ -1513,9 +1541,10 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, return getUNDEF(VT); // If Identity shuffle return that node. - bool Identity = true; + bool Identity = true, AllSame = true; for (unsigned i = 0; i != NElts; ++i) { if (MaskVec[i] >= 0 && MaskVec[i] != (int)i) Identity = false; + if (MaskVec[i] != MaskVec[0]) AllSame = false; } if (Identity && NElts) return N1; @@ -1537,18 +1566,35 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, if (Splat && Splat.getOpcode() == ISD::UNDEF) return getUNDEF(VT); + bool SameNumElts = + V.getValueType().getVectorNumElements() == VT.getVectorNumElements(); + // We only have a splat which can skip shuffles if there is a splatted // value and no undef lanes rearranged by the shuffle. if (Splat && UndefElements.none()) { // Splat of <x, x, ..., x>, return <x, x, ..., x>, provided that the // number of elements match or the value splatted is a zero constant. - if (V.getValueType().getVectorNumElements() == - VT.getVectorNumElements()) + if (SameNumElts) return N1; if (auto *C = dyn_cast<ConstantSDNode>(Splat)) if (C->isNullValue()) return N1; } + + // If the shuffle itself creates a splat, build the vector directly. + if (AllSame && SameNumElts) { + const SDValue &Splatted = BV->getOperand(MaskVec[0]); + SmallVector<SDValue, 8> Ops(NElts, Splatted); + + EVT BuildVT = BV->getValueType(0); + SDValue NewBV = getNode(ISD::BUILD_VECTOR, dl, BuildVT, Ops); + + // We may have jumped through bitcasts, so the type of the + // BUILD_VECTOR may not match the type of the shuffle. + if (BuildVT != VT) + NewBV = getNode(ISD::BITCAST, dl, VT, NewBV); + return NewBV; + } } } @@ -2323,6 +2369,21 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, KnownZero = APInt::getHighBitsSet(BitWidth, Leaders); break; } + case ISD::EXTRACT_ELEMENT: { + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + const unsigned Index = + cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + const unsigned BitWidth = Op.getValueType().getSizeInBits(); + + // Remove low part of known bits mask + KnownZero = KnownZero.getHiBits(KnownZero.getBitWidth() - Index * BitWidth); + KnownOne = KnownOne.getHiBits(KnownOne.getBitWidth() - Index * BitWidth); + + // Remove high part of known bit mask + KnownZero = KnownZero.trunc(BitWidth); + KnownOne = KnownOne.trunc(BitWidth); + break; + } case ISD::FrameIndex: case ISD::TargetFrameIndex: if (unsigned Align = InferPtrAlignment(Op)) { @@ -2522,6 +2583,21 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ // FIXME: it's tricky to do anything useful for this, but it is an important // case for targets like X86. break; + case ISD::EXTRACT_ELEMENT: { + const int KnownSign = ComputeNumSignBits(Op.getOperand(0), Depth+1); + const int BitWidth = Op.getValueType().getSizeInBits(); + const int Items = + Op.getOperand(0).getValueType().getSizeInBits() / BitWidth; + + // Get reverse index (starting from 1), Op1 value indexes elements from + // little end. Sign starts at big end. + const int rIndex = Items - 1 - + cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + + // If the sign portion ends in our element the substraction gives correct + // result. Otherwise it gives either negative or > bitwidth result + return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0); + } } // If we are looking at the loaded value of the SDNode. @@ -2683,6 +2759,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, return getConstantFP(apf, VT); } case ISD::BITCAST: + if (VT == MVT::f16 && C->getValueType(0) == MVT::i16) + return getConstantFP(APFloat(APFloat::IEEEhalf, Val), VT); if (VT == MVT::f32 && C->getValueType(0) == MVT::i32) return getConstantFP(APFloat(APFloat::IEEEsingle, Val), VT); else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) @@ -2756,7 +2834,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, return getConstant(api, VT); } case ISD::BITCAST: - if (VT == MVT::i32 && C->getValueType(0) == MVT::f32) + if (VT == MVT::i16 && C->getValueType(0) == MVT::f16) + return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), VT); + else if (VT == MVT::i32 && C->getValueType(0) == MVT::f32) return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT); else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64) return getConstant(V.bitcastToAPInt().getZExtValue(), VT); @@ -3379,8 +3459,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, } // Perform trivial constant folding. - SDValue SV = FoldConstantArithmetic(Opcode, VT, N1.getNode(), N2.getNode()); - if (SV.getNode()) return SV; + if (SDValue SV = + FoldConstantArithmetic(Opcode, VT, N1.getNode(), N2.getNode())) + return SV; // Canonicalize constant to RHS if commutative. if (N1C && !N2C && isCommutativeBinOp(Opcode)) { @@ -3564,7 +3645,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, const APFloat &V3 = N3CFP->getValueAPF(); APFloat::opStatus s = V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven); - if (s != APFloat::opInvalidOp) + if (!TLI->hasFloatingPointExceptions() || s != APFloat::opInvalidOp) return getConstantFP(V1, VT); } break; @@ -3913,9 +3994,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool OptSize = - MF.getFunction()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); + bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -4028,8 +4107,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool OptSize = MF.getFunction()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); + bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -4123,8 +4201,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool OptSize = MF.getFunction()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); + bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -4214,11 +4291,13 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, // Then check to see if we should lower the memcpy with target-specific // code. If the target chooses to do this, this is the next best. - SDValue Result = - TSI->EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align, - isVol, AlwaysInline, DstPtrInfo, SrcPtrInfo); - if (Result.getNode()) - return Result; + if (TSI) { + SDValue Result = TSI->EmitTargetCodeForMemcpy( + *this, dl, Chain, Dst, Src, Size, Align, isVol, AlwaysInline, + DstPtrInfo, SrcPtrInfo); + if (Result.getNode()) + return Result; + } // If we really need inline code and the target declined to provide it, // use a (potentially long) sequence of loads and stores. @@ -4280,10 +4359,12 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst, // Then check to see if we should lower the memmove with target-specific // code. If the target chooses to do this, this is the next best. - SDValue Result = TSI->EmitTargetCodeForMemmove( - *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo, SrcPtrInfo); - if (Result.getNode()) - return Result; + if (TSI) { + SDValue Result = TSI->EmitTargetCodeForMemmove( + *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo, SrcPtrInfo); + if (Result.getNode()) + return Result; + } // FIXME: If the memmove is volatile, lowering it to plain libc memmove may // not be safe. See memcpy above for more details. @@ -4332,10 +4413,12 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst, // Then check to see if we should lower the memset with target-specific // code. If the target chooses to do this, this is the next best. - SDValue Result = TSI->EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, - Size, Align, isVol, DstPtrInfo); - if (Result.getNode()) - return Result; + if (TSI) { + SDValue Result = TSI->EmitTargetCodeForMemset( + *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo); + if (Result.getNode()) + return Result; + } // Emit a library call. Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(*getContext()); @@ -4680,10 +4763,10 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, assert(VT.isInteger() == MemVT.isInteger() && "Cannot convert from FP to Int or Int -> FP!"); assert(VT.isVector() == MemVT.isVector() && - "Cannot use trunc store to convert to or from a vector!"); + "Cannot use an ext load to convert to or from a vector!"); assert((!VT.isVector() || VT.getVectorNumElements() == MemVT.getVectorNumElements()) && - "Cannot use trunc store to change the number of vector elements!"); + "Cannot use an ext load to change the number of vector elements!"); } bool Indexed = AM != ISD::UNINDEXED; @@ -4917,6 +5000,61 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, SDLoc dl, SDValue Base, return SDValue(N, 0); } +SDValue +SelectionDAG::getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain, + SDValue Ptr, SDValue Mask, SDValue Src0, EVT MemVT, + MachineMemOperand *MMO, ISD::LoadExtType ExtTy) { + + SDVTList VTs = getVTList(VT, MVT::Other); + SDValue Ops[] = { Chain, Ptr, Mask, Src0 }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops); + ID.AddInteger(VT.getRawBits()); + ID.AddInteger(encodeMemSDNodeFlags(ExtTy, ISD::UNINDEXED, + MMO->isVolatile(), + MMO->isNonTemporal(), + MMO->isInvariant())); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + cast<MaskedLoadSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + SDNode *N = new (NodeAllocator) MaskedLoadSDNode(dl.getIROrder(), + dl.getDebugLoc(), Ops, 4, VTs, + ExtTy, MemVT, MMO); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val, + SDValue Ptr, SDValue Mask, EVT MemVT, + MachineMemOperand *MMO, bool isTrunc) { + assert(Chain.getValueType() == MVT::Other && + "Invalid chain type"); + EVT VT = Val.getValueType(); + SDVTList VTs = getVTList(MVT::Other); + SDValue Ops[] = { Chain, Ptr, Mask, Val }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops); + ID.AddInteger(VT.getRawBits()); + ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(), + MMO->isNonTemporal(), MMO->isInvariant())); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + cast<MaskedStoreSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + SDNode *N = new (NodeAllocator) MaskedStoreSDNode(dl.getIROrder(), + dl.getDebugLoc(), Ops, 4, + VTs, isTrunc, MemVT, MMO); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + SDValue SelectionDAG::getVAArg(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, SDValue SV, @@ -6495,11 +6633,25 @@ bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes); } - // Handle X+C - if (isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc && - cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes) - return true; - + // Handle X + C. + if (isBaseWithConstantOffset(Loc)) { + int64_t LocOffset = cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue(); + if (Loc.getOperand(0) == BaseLoc) { + // If the base location is a simple address with no offset itself, then + // the second load's first add operand should be the base address. + if (LocOffset == Dist * (int)Bytes) + return true; + } else if (isBaseWithConstantOffset(BaseLoc)) { + // The base location itself has an offset, so subtract that value from the + // second load's offset before comparing to distance * size. + int64_t BOffset = + cast<ConstantSDNode>(BaseLoc.getOperand(1))->getSExtValue(); + if (Loc.getOperand(0) == BaseLoc.getOperand(0)) { + if ((LocOffset - BOffset) == Dist * (int)Bytes) + return true; + } + } + } const GlobalValue *GV1 = nullptr; const GlobalValue *GV2 = nullptr; int64_t Offset1 = 0; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 8f582f1..097b618 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -16,9 +16,11 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" @@ -46,6 +48,8 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Statepoint.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -54,7 +58,6 @@ #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" -#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetSelectionDAGInfo.h" @@ -564,6 +567,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, } else if (NumParts > 0) { // If the intermediate type was expanded, split each the value into // legal parts. + assert(NumIntermediates != 0 && "division by zero"); assert(NumParts % NumIntermediates == 0 && "Must expand into a divisible number of parts!"); unsigned Factor = NumParts / NumIntermediates; @@ -865,7 +869,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa, AA = &aa; GFI = gfi; LibInfo = li; - DL = DAG.getSubtarget().getDataLayout(); + DL = DAG.getTarget().getDataLayout(); Context = DAG.getContext(); LPadToCallSiteMap.clear(); } @@ -884,6 +888,7 @@ void SelectionDAGBuilder::clear() { CurInst = nullptr; HasTailCall = false; SDNodeOrder = LowestSDNodeOrder; + StatepointLowering.clear(); } /// clearDanglingDebugInfo - Clear the dangling debug information @@ -1234,24 +1239,29 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { unsigned NumValues = ValueVTs.size(); if (NumValues) { SDValue RetOp = getValue(I.getOperand(0)); - for (unsigned j = 0, f = NumValues; j != f; ++j) { - EVT VT = ValueVTs[j]; - ISD::NodeType ExtendKind = ISD::ANY_EXTEND; + const Function *F = I.getParent()->getParent(); + + ISD::NodeType ExtendKind = ISD::ANY_EXTEND; + if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, + Attribute::SExt)) + ExtendKind = ISD::SIGN_EXTEND; + else if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, + Attribute::ZExt)) + ExtendKind = ISD::ZERO_EXTEND; + + LLVMContext &Context = F->getContext(); + bool RetInReg = F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, + Attribute::InReg); - const Function *F = I.getParent()->getParent(); - if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, - Attribute::SExt)) - ExtendKind = ISD::SIGN_EXTEND; - else if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, - Attribute::ZExt)) - ExtendKind = ISD::ZERO_EXTEND; + for (unsigned j = 0; j != NumValues; ++j) { + EVT VT = ValueVTs[j]; if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) - VT = TLI.getTypeForExtArgOrReturn(*DAG.getContext(), VT, ExtendKind); + VT = TLI.getTypeForExtArgOrReturn(Context, VT, ExtendKind); - unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT); - MVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT); + unsigned NumParts = TLI.getNumRegisters(Context, VT); + MVT PartVT = TLI.getRegisterType(Context, VT); SmallVector<SDValue, 4> Parts(NumParts); getCopyToParts(DAG, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + j), @@ -1259,8 +1269,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { // 'inreg' on function refers to return value ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); - if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, - Attribute::InReg)) + if (RetInReg) Flags.setInReg(); // Propagate extension type if any @@ -1405,7 +1414,7 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, if (TM.Options.NoNaNsFPMath) Condition = getFCmpCodeWithoutNaN(Condition); } else { - Condition = ISD::SETEQ; // silence warning. + (void)Condition; // silence warning. llvm_unreachable("Unknown compare instruction"); } @@ -1947,7 +1956,7 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(), Reg, VT); SDValue Cmp; - unsigned PopCount = CountPopulation_64(B.Mask); + unsigned PopCount = countPopulation(B.Mask); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (PopCount == 1) { // Testing for a single bit; just compare the shift count with what it @@ -1959,7 +1968,7 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, // There is only one zero bit in the range, test for it directly. Cmp = DAG.getSetCC( getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp, - DAG.getConstant(CountTrailingOnes_64(B.Mask), VT), ISD::SETNE); + DAG.getConstant(countTrailingOnes(B.Mask), VT), ISD::SETNE); } else { // Make desired shift SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurSDLoc(), VT, @@ -2062,10 +2071,14 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { // Get the two live-in registers as SDValues. The physregs have already been // copied into virtual registers. SDValue Ops[2]; - Ops[0] = DAG.getZExtOrTrunc( - DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), - FuncInfo.ExceptionPointerVirtReg, TLI.getPointerTy()), - getCurSDLoc(), ValueVTs[0]); + if (FuncInfo.ExceptionPointerVirtReg) { + Ops[0] = DAG.getZExtOrTrunc( + DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), + FuncInfo.ExceptionPointerVirtReg, TLI.getPointerTy()), + getCurSDLoc(), ValueVTs[0]); + } else { + Ops[0] = DAG.getConstant(0, TLI.getPointerTy()); + } Ops[1] = DAG.getZExtOrTrunc( DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), FuncInfo.ExceptionSelectorVirtReg, TLI.getPointerTy()), @@ -2077,6 +2090,27 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { setValue(&LP, Res); } +unsigned +SelectionDAGBuilder::visitLandingPadClauseBB(GlobalValue *ClauseGV, + MachineBasicBlock *LPadBB) { + SDValue Chain = getControlRoot(); + + // Get the typeid that we will dispatch on later. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy()); + unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC); + unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(ClauseGV); + SDValue Sel = DAG.getConstant(TypeID, TLI.getPointerTy()); + Chain = DAG.getCopyToReg(Chain, getCurSDLoc(), VReg, Sel); + + // Branch to the main landing pad block. + MachineBasicBlock *ClauseMBB = FuncInfo.MBB; + ClauseMBB->addSuccessor(LPadBB); + DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, Chain, + DAG.getBasicBlock(LPadBB))); + return VReg; +} + /// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for /// small case ranges). bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, @@ -2363,17 +2397,8 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, CaseRecVector& WorkList, const Value* SV, MachineBasicBlock* SwitchBB) { - // Get the MachineFunction which holds the current MBB. This is used when - // inserting any additional MBBs necessary to represent the switch. - MachineFunction *CurMF = FuncInfo.MF; - - // Figure out which block is immediately after the current one. - MachineFunction::iterator BBI = CR.CaseBB; - ++BBI; - Case& FrontCase = *CR.Range.first; Case& BackCase = *(CR.Range.second-1); - const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); // Size is the number of Cases represented by this range. unsigned Size = CR.Range.second - CR.Range.first; @@ -2395,6 +2420,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, DEBUG(dbgs() << "Selecting best pivot: \n" << "First: " << First << ", Last: " << Last <<'\n' << "LSize: " << LSize << ", RSize: " << RSize << '\n'); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second; J!=E; ++I, ++J) { const APInt &LEnd = cast<ConstantInt>(I->High)->getValue(); @@ -2404,13 +2430,17 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, "Invalid case distance"); // Use volatile double here to avoid excess precision issues on some hosts, // e.g. that use 80-bit X87 registers. + // Only consider the density of sub-ranges that actually have sufficient + // entries to be lowered as a jump table. volatile double LDensity = - (double)LSize.roundToDouble() / - (LEnd - First + 1ULL).roundToDouble(); + LSize.ult(TLI.getMinimumJumpTableEntries()) + ? 0.0 + : LSize.roundToDouble() / (LEnd - First + 1ULL).roundToDouble(); volatile double RDensity = - (double)RSize.roundToDouble() / - (Last - RBegin + 1ULL).roundToDouble(); - volatile double Metric = Range.logBase2()*(LDensity+RDensity); + RSize.ult(TLI.getMinimumJumpTableEntries()) + ? 0.0 + : RSize.roundToDouble() / (Last - RBegin + 1ULL).roundToDouble(); + volatile double Metric = Range.logBase2() * (LDensity + RDensity); // Should always split in some non-trivial place DEBUG(dbgs() <<"=>Step\n" << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n' @@ -2427,13 +2457,25 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, RSize -= J->size(); } - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (areJTsAllowed(TLI)) { - // If our case is dense we *really* should handle it earlier! - assert((FMetric > 0) && "Should handle dense range earlier!"); - } else { + if (FMetric == 0 || !areJTsAllowed(TLI)) Pivot = CR.Range.first + Size/2; - } + splitSwitchCase(CR, Pivot, WorkList, SV, SwitchBB); + return true; +} + +void SelectionDAGBuilder::splitSwitchCase(CaseRec &CR, CaseItr Pivot, + CaseRecVector &WorkList, + const Value *SV, + MachineBasicBlock *SwitchBB) { + // Get the MachineFunction which holds the current MBB. This is used when + // inserting any additional MBBs necessary to represent the switch. + MachineFunction *CurMF = FuncInfo.MF; + + // Figure out which block is immediately after the current one. + MachineFunction::iterator BBI = CR.CaseBB; + ++BBI; + + const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); CaseRange LHSR(CR.Range.first, Pivot); CaseRange RHSR(Pivot, CR.Range.second); @@ -2446,10 +2488,9 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, // LHS's Case Value, and that Case Value is exactly one less than the // Pivot's Value, then we can branch directly to the LHS's Target, // rather than creating a leaf node for it. - if ((LHSR.second - LHSR.first) == 1 && - LHSR.first->High == CR.GE && + if ((LHSR.second - LHSR.first) == 1 && LHSR.first->High == CR.GE && cast<ConstantInt>(C)->getValue() == - (cast<ConstantInt>(CR.GE)->getValue() + 1LL)) { + (cast<ConstantInt>(CR.GE)->getValue() + 1LL)) { TrueBB = LHSR.first->BB; } else { TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB); @@ -2466,12 +2507,12 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, // the current Case Value, rather than emitting a RHS leaf node for it. if ((RHSR.second - RHSR.first) == 1 && CR.LT && cast<ConstantInt>(RHSR.first->Low)->getValue() == - (cast<ConstantInt>(CR.LT)->getValue() - 1LL)) { + (cast<ConstantInt>(CR.LT)->getValue() - 1LL)) { FalseBB = RHSR.first->BB; } else { FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB); CurMF->insert(BBI, FalseBB); - WorkList.push_back(CaseRec(FalseBB,CR.LT,C,RHSR)); + WorkList.push_back(CaseRec(FalseBB, CR.LT, C, RHSR)); // Put SV in a virtual register to make it available from the new blocks. ExportFromCurrentBlock(SV); @@ -2486,8 +2527,6 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, visitSwitchCase(CB, SwitchBB); else SwitchCases.push_back(CB); - - return true; } /// handleBitTestsSwitchCase - if current case range has few destination and @@ -2514,15 +2553,14 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, return false; size_t numCmps = 0; - for (CaseItr I = CR.Range.first, E = CR.Range.second; - I!=E; ++I) { + for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { // Single case counts one, case range - two. numCmps += (I->Low == I->High ? 1 : 2); } // Count unique destinations SmallSet<MachineBasicBlock*, 4> Dests; - for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) { + for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { Dests.insert(I->BB); if (Dests.size() > 3) // Don't bother the code below, if there are too much unique destinations @@ -2629,9 +2667,8 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, void SelectionDAGBuilder::Clusterify(CaseVector& Cases, const SwitchInst& SI) { BranchProbabilityInfo *BPI = FuncInfo.BPI; - // Start with "simple" cases - for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end(); - i != e; ++i) { + // Start with "simple" cases. + for (SwitchInst::ConstCaseIt i : SI.cases()) { const BasicBlock *SuccBB = i.getCaseSuccessor(); MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB]; @@ -2694,32 +2731,58 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { // Figure out which block is immediately after the current one. MachineBasicBlock *NextBlock = nullptr; + if (SwitchMBB + 1 != FuncInfo.MF->end()) + NextBlock = SwitchMBB + 1; + + + // Create a vector of Cases, sorted so that we can efficiently create a binary + // search tree from them. + CaseVector Cases; + Clusterify(Cases, SI); + + // Get the default destination MBB. MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()]; - // If there is only the default destination, branch to it if it is not the - // next basic block. Otherwise, just fall through. - if (!SI.getNumCases()) { - // Update machine-CFG edges. + if (isa<UnreachableInst>(SI.getDefaultDest()->getFirstNonPHIOrDbg()) && + !Cases.empty()) { + // Replace an unreachable default destination with the most popular case + // destination. + DenseMap<const BasicBlock *, unsigned> Popularity; + unsigned MaxPop = 0; + const BasicBlock *MaxBB = nullptr; + for (auto I : SI.cases()) { + const BasicBlock *BB = I.getCaseSuccessor(); + if (++Popularity[BB] > MaxPop) { + MaxPop = Popularity[BB]; + MaxBB = BB; + } + } - // If this is not a fall-through branch, emit the branch. + // Set new default. + assert(MaxPop > 0); + assert(MaxBB); + Default = FuncInfo.MBBMap[MaxBB]; + + // Remove cases that were pointing to the destination that is now the default. + Cases.erase(std::remove_if(Cases.begin(), Cases.end(), + [&](const Case &C) { return C.BB == Default; }), + Cases.end()); + } + + // If there is only the default destination, go there directly. + if (Cases.empty()) { + // Update machine-CFG edges. SwitchMBB->addSuccessor(Default); - if (Default != NextBlock) - DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), - MVT::Other, getControlRoot(), - DAG.getBasicBlock(Default))); + // If this is not a fall-through branch, emit the branch. + if (Default != NextBlock) { + DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, + getControlRoot(), DAG.getBasicBlock(Default))); + } return; } - // If there are any non-default case statements, create a vector of Cases - // representing each one, and sort the vector so that we can efficiently - // create a binary search tree from them. - CaseVector Cases; - Clusterify(Cases, SI); - - // Get the Value to be switched on and default basic blocks, which will be - // inserted into CaseBlock records, representing basic blocks in the binary - // search tree. + // Get the Value to be switched on. const Value *SV = SI.getCondition(); // Push the initial CaseRec onto the worklist @@ -3613,6 +3676,74 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { DAG.setRoot(StoreNode); } +void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) { + SDLoc sdl = getCurSDLoc(); + + // llvm.masked.store.*(Src0, Ptr, alignemt, Mask) + Value *PtrOperand = I.getArgOperand(1); + SDValue Ptr = getValue(PtrOperand); + SDValue Src0 = getValue(I.getArgOperand(0)); + SDValue Mask = getValue(I.getArgOperand(3)); + EVT VT = Src0.getValueType(); + unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(2)))->getZExtValue(); + if (!Alignment) + Alignment = DAG.getEVTAlignment(VT); + + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); + + MachineMemOperand *MMO = + DAG.getMachineFunction(). + getMachineMemOperand(MachinePointerInfo(PtrOperand), + MachineMemOperand::MOStore, VT.getStoreSize(), + Alignment, AAInfo); + SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, VT, + MMO, false); + DAG.setRoot(StoreNode); + setValue(&I, StoreNode); +} + +void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) { + SDLoc sdl = getCurSDLoc(); + + // @llvm.masked.load.*(Ptr, alignment, Mask, Src0) + Value *PtrOperand = I.getArgOperand(0); + SDValue Ptr = getValue(PtrOperand); + SDValue Src0 = getValue(I.getArgOperand(3)); + SDValue Mask = getValue(I.getArgOperand(2)); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT VT = TLI.getValueType(I.getType()); + unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue(); + if (!Alignment) + Alignment = DAG.getEVTAlignment(VT); + + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); + const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); + + SDValue InChain = DAG.getRoot(); + if (AA->pointsToConstantMemory( + AliasAnalysis::Location(PtrOperand, + AA->getTypeStoreSize(I.getType()), + AAInfo))) { + // Do not serialize (non-volatile) loads of constant memory with anything. + InChain = DAG.getEntryNode(); + } + + MachineMemOperand *MMO = + DAG.getMachineFunction(). + getMachineMemOperand(MachinePointerInfo(PtrOperand), + MachineMemOperand::MOLoad, VT.getStoreSize(), + Alignment, AAInfo, Ranges); + + SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO, + ISD::NON_EXTLOAD); + SDValue OutChain = Load.getValue(1); + DAG.setRoot(OutChain); + setValue(&I, Load); +} + void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { SDLoc dl = getCurSDLoc(); AtomicOrdering SuccessOrder = I.getSuccessOrdering(); @@ -4460,11 +4591,10 @@ static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS, return DAG.getConstantFP(1.0, LHS.getValueType()); const Function *F = DAG.getMachineFunction().getFunction(); - if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::OptimizeForSize) || + if (!F->hasFnAttribute(Attribute::OptimizeForSize) || // If optimizing for size, don't insert too many multiplies. This // inserts up to 5 multiplies. - CountPopulation_32(Val)+Log2_32(Val) < 7) { + countPopulation(Val) + Log2_32(Val) < 7) { // We use the simple binary decomposition method to generate the multiply // sequence. There are more optimal ways to do this (for example, // powi(x,15) generates one more multiply than it should), but this has @@ -4623,7 +4753,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; case Intrinsic::read_register: { Value *Reg = I.getArgOperand(0); - SDValue RegName = DAG.getMDNode(cast<MDNode>(Reg)); + SDValue RegName = + DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata())); EVT VT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::READ_REGISTER, sdl, VT, RegName)); return nullptr; @@ -4632,7 +4763,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Value *Reg = I.getArgOperand(0); Value *RegValue = I.getArgOperand(1); SDValue Chain = getValue(RegValue).getOperand(0); - SDValue RegName = DAG.getMDNode(cast<MDNode>(Reg)); + SDValue RegName = + DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata())); DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain, RegName, getValue(RegValue))); return nullptr; @@ -4642,6 +4774,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::longjmp: return &"_longjmp"[!TLI.usesUnderscoreLongJmp()]; case Intrinsic::memcpy: { + // FIXME: this definition of "user defined address space" is x86-specific // Assert for address < 256 since we support only user defined address // spaces. assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace() @@ -4662,6 +4795,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } case Intrinsic::memset: { + // FIXME: this definition of "user defined address space" is x86-specific // Assert for address < 256 since we support only user defined address // spaces. assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace() @@ -4679,6 +4813,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } case Intrinsic::memmove: { + // FIXME: this definition of "user defined address space" is x86-specific // Assert for address < 256 since we support only user defined address // spaces. assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace() @@ -4914,6 +5049,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } + case Intrinsic::masked_load: + visitMaskedLoad(I); + return nullptr; + case Intrinsic::masked_store: + visitMaskedStore(I); + return nullptr; case Intrinsic::x86_mmx_pslli_w: case Intrinsic::x86_mmx_pslli_d: case Intrinsic::x86_mmx_pslli_q: @@ -5459,6 +5600,78 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { visitPatchpoint(&I); return nullptr; } + case Intrinsic::experimental_gc_statepoint: { + visitStatepoint(I); + return nullptr; + } + case Intrinsic::experimental_gc_result_int: + case Intrinsic::experimental_gc_result_float: + case Intrinsic::experimental_gc_result_ptr: + case Intrinsic::experimental_gc_result: { + visitGCResult(I); + return nullptr; + } + case Intrinsic::experimental_gc_relocate: { + visitGCRelocate(I); + return nullptr; + } + case Intrinsic::instrprof_increment: + llvm_unreachable("instrprof failed to lower an increment"); + + case Intrinsic::frameallocate: { + MachineFunction &MF = DAG.getMachineFunction(); + const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); + + // Do the allocation and map it as a normal value. + // FIXME: Maybe we should add this to the alloca map so that we don't have + // to register allocate it? + uint64_t Size = cast<ConstantInt>(I.getArgOperand(0))->getZExtValue(); + int Alloc = MF.getFrameInfo()->CreateFrameAllocation(Size); + MVT PtrVT = TLI.getPointerTy(0); + SDValue FIVal = DAG.getFrameIndex(Alloc, PtrVT); + setValue(&I, FIVal); + + // Directly emit a FRAME_ALLOC machine instr. Label assignment emission is + // the same on all targets. + MCSymbol *FrameAllocSym = + MF.getMMI().getContext().getOrCreateFrameAllocSymbol(MF.getName()); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl, + TII->get(TargetOpcode::FRAME_ALLOC)) + .addSym(FrameAllocSym) + .addFrameIndex(Alloc); + + return nullptr; + } + + case Intrinsic::framerecover: { + // i8* @llvm.framerecover(i8* %fn, i8* %fp) + MachineFunction &MF = DAG.getMachineFunction(); + MVT PtrVT = TLI.getPointerTy(0); + + // Get the symbol that defines the frame offset. + Function *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts()); + MCSymbol *FrameAllocSym = + MF.getMMI().getContext().getOrCreateFrameAllocSymbol(Fn->getName()); + + // Create a TargetExternalSymbol for the label to avoid any target lowering + // that would make this PC relative. + StringRef Name = FrameAllocSym->getName(); + assert(Name.size() == strlen(Name.data()) && "not null terminated"); + SDValue OffsetSym = DAG.getTargetExternalSymbol(Name.data(), PtrVT); + SDValue OffsetVal = + DAG.getNode(ISD::FRAME_ALLOC_RECOVER, sdl, PtrVT, OffsetSym); + + // Add the offset to the FP. + Value *FP = I.getArgOperand(1); + SDValue FPVal = getValue(FP); + SDValue Add = DAG.getNode(ISD::ADD, sdl, PtrVT, FPVal, OffsetVal); + setValue(&I, Add); + + return nullptr; + } + case Intrinsic::eh_begincatch: + case Intrinsic::eh_endcatch: + llvm_unreachable("begin/end catch intrinsics not lowered in codegen"); } } @@ -5491,9 +5704,8 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, CLI.setChain(getRoot()); } - - const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); - std::pair<SDValue, SDValue> Result = TLI->LowerCallTo(CLI); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); assert((CLI.IsTailCall || Result.second.getNode()) && "Non-null chain expected with non-tail call!"); @@ -6191,9 +6403,10 @@ static void GetRegistersForValue(SelectionDAG &DAG, // If this is a constraint for a single physreg, or a constraint for a // register class, find it. - std::pair<unsigned, const TargetRegisterClass*> PhysReg = - TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, - OpInfo.ConstraintVT); + std::pair<unsigned, const TargetRegisterClass *> PhysReg = + TLI.getRegForInlineAsmConstraint(MF.getSubtarget().getRegisterInfo(), + OpInfo.ConstraintCode, + OpInfo.ConstraintVT); unsigned NumRegs = 1; if (OpInfo.ConstraintVT != MVT::Other) { @@ -6289,8 +6502,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { SDISelAsmOperandInfoVector ConstraintOperands; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - TargetLowering::AsmOperandInfoVector - TargetConstraints = TLI.ParseConstraints(CS); + TargetLowering::AsmOperandInfoVector TargetConstraints = + TLI.ParseConstraints(DAG.getSubtarget().getRegisterInfo(), CS); bool hasMemory = false; @@ -6382,12 +6595,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; if (OpInfo.ConstraintVT != Input.ConstraintVT) { - std::pair<unsigned, const TargetRegisterClass*> MatchRC = - TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, - OpInfo.ConstraintVT); - std::pair<unsigned, const TargetRegisterClass*> InputRC = - TLI.getRegForInlineAsmConstraint(Input.ConstraintCode, - Input.ConstraintVT); + const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo(); + std::pair<unsigned, const TargetRegisterClass *> MatchRC = + TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode, + OpInfo.ConstraintVT); + std::pair<unsigned, const TargetRegisterClass *> InputRC = + TLI.getRegForInlineAsmConstraint(TRI, Input.ConstraintCode, + Input.ConstraintVT); if ((OpInfo.ConstraintVT.isInteger() != Input.ConstraintVT.isInteger()) || (MatchRC.second != InputRC.second)) { @@ -6848,7 +7062,8 @@ std::pair<SDValue, SDValue> SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx, unsigned NumArgs, SDValue Callee, bool UseVoidTy, - MachineBasicBlock *LandingPad) { + MachineBasicBlock *LandingPad, + bool IsPatchPoint) { TargetLowering::ArgListTy Args; Args.reserve(NumArgs); @@ -6871,7 +7086,7 @@ SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx, TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot()) .setCallee(CS.getCallingConv(), retTy, Callee, std::move(Args), NumArgs) - .setDiscardResult(CS->use_empty()); + .setDiscardResult(CS->use_empty()).setIsPatchPoint(IsPatchPoint); return lowerInvokable(CLI, LandingPad); } @@ -7003,7 +7218,7 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs; std::pair<SDValue, SDValue> Result = lowerCallOperands(CS, NumMetaOpers, NumCallArgs, Callee, IsAnyRegCC, - LandingPad); + LandingPad, true); SDNode *CallEnd = Result.second.getNode(); if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg)) @@ -7051,8 +7266,7 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, // Push the arguments from the call instruction up to the register mask. SDNode::op_iterator e = HasGlue ? Call->op_end()-2 : Call->op_end()-1; - for (SDNode::op_iterator i = Call->op_begin()+2; i != e; ++i) - Ops.push_back(*i); + Ops.append(Call->op_begin() + 2, e); // Push live variables for the stack map. addStackMapLiveVars(CS, NumMetaOpers + NumArgs, Ops, *this); @@ -7251,11 +7465,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { } if (Args[i].isNest) Flags.setNest(); - if (NeedsRegBlock) { + if (NeedsRegBlock) Flags.setInConsecutiveRegs(); - if (Value == NumValues - 1) - Flags.setInConsecutiveRegsLast(); - } Flags.setOrigAlign(OriginalAlignment); MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT); @@ -7304,6 +7515,9 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { CLI.Outs.push_back(MyFlags); CLI.OutVals.push_back(Parts[j]); } + + if (NeedsRegBlock && Value == NumValues - 1) + CLI.Outs[CLI.Outs.size() - 1].Flags.setInConsecutiveRegsLast(); } } @@ -7460,7 +7674,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { ISD::ArgFlagsTy Flags; Flags.setSRet(); MVT RegisterVT = TLI->getRegisterType(*DAG.getContext(), ValueVTs[0]); - ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true, 0, 0); + ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true, + ISD::InputArg::NoArgIndex, 0); Ins.push_back(RetArg); } @@ -7518,11 +7733,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { } if (F.getAttributes().hasAttribute(Idx, Attribute::Nest)) Flags.setNest(); - if (NeedsRegBlock) { + if (NeedsRegBlock) Flags.setInConsecutiveRegs(); - if (Value == NumValues - 1) - Flags.setInConsecutiveRegsLast(); - } Flags.setOrigAlign(OriginalAlignment); MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT); @@ -7537,6 +7749,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { MyFlags.Flags.setOrigAlign(1); Ins.push_back(MyFlags); } + if (NeedsRegBlock && Value == NumValues - 1) + Ins[Ins.size() - 1].Flags.setInConsecutiveRegsLast(); PartBase += VT.getStoreSize(); } } @@ -7671,7 +7885,6 @@ void SelectionDAGISel::LowerArguments(const Function &F) { assert(i == InVals.size() && "Argument register count mismatch!"); // Finally, if the target has anything special to do, allow it to do so. - // FIXME: this should insert code into the DAG! EmitFunctionEntryCode(); } @@ -7762,6 +7975,7 @@ MachineBasicBlock * SelectionDAGBuilder::StackProtectorDescriptor:: AddSuccessorMBB(const BasicBlock *BB, MachineBasicBlock *ParentMBB, + bool IsLikely, MachineBasicBlock *SuccMBB) { // If SuccBB has not been created yet, create it. if (!SuccMBB) { @@ -7771,6 +7985,7 @@ AddSuccessorMBB(const BasicBlock *BB, MF->insert(++BBI, SuccMBB); } // Add it as a successor of ParentMBB. - ParentMBB->addSuccessor(SuccMBB); + ParentMBB->addSuccessor( + SuccMBB, BranchProbabilityInfo::getBranchWeightStackProtector(IsLikely)); return SuccMBB; } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index f74e652..ad7411f 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -14,11 +14,13 @@ #ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_SELECTIONDAGBUILDER_H #define LLVM_LIB_CODEGEN_SELECTIONDAG_SELECTIONDAGBUILDER_H +#include "StatepointLowering.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/IR/CallSite.h" +#include "llvm/IR/Statepoint.h" #include "llvm/IR/Constants.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetLowering.h" @@ -115,6 +117,10 @@ public: /// get simple disambiguation between loads without worrying about alias /// analysis. SmallVector<SDValue, 8> PendingLoads; + + /// State used while lowering a statepoint sequence (gc_statepoint, + /// gc_relocate, and gc_result). See StatepointLowering.hpp/cpp for details. + StatepointLoweringState StatepointLowering; private: /// PendingExports - CopyToReg nodes that copy values to virtual registers @@ -417,8 +423,8 @@ private: assert(!shouldEmitStackProtector() && "Stack Protector Descriptor is " "already initialized!"); ParentMBB = MBB; - SuccessMBB = AddSuccessorMBB(BB, MBB); - FailureMBB = AddSuccessorMBB(BB, MBB, FailureMBB); + SuccessMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ true); + FailureMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ false, FailureMBB); if (!Guard) Guard = StackProtCheckCall.getArgOperand(0); } @@ -487,9 +493,10 @@ private: /// Add a successor machine basic block to ParentMBB. If the successor mbb /// has not been created yet (i.e. if SuccMBB = 0), then the machine basic - /// block will be created. + /// block will be created. Assign a large weight if IsLikely is true. MachineBasicBlock *AddSuccessorMBB(const BasicBlock *BB, MachineBasicBlock *ParentMBB, + bool IsLikely, MachineBasicBlock *SuccMBB = nullptr); }; @@ -612,6 +619,13 @@ public: N = NewN; } + void removeValue(const Value *V) { + // This is to support hack in lowerCallFromStatepoint + // Should be removed when hack is resolved + if (NodeMap.count(V)) + NodeMap.erase(V); + } + void setUnusedArgValue(const Value *V, SDValue NewN) { SDValue &N = UnusedArgNodeMap[V]; assert(!N.getNode() && "Already set a value for this node!"); @@ -640,12 +654,15 @@ public: unsigned NumArgs, SDValue Callee, bool UseVoidTy = false, - MachineBasicBlock *LandingPad = nullptr); + MachineBasicBlock *LandingPad = nullptr, + bool IsPatchPoint = false); /// UpdateSplitBlock - When an MBB was split during scheduling, update the /// references that need to refer to the last resulting block. void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last); + // This function is responsible for the whole statepoint lowering process. + void LowerStatepoint(ImmutableStatepoint Statepoint); private: std::pair<SDValue, SDValue> lowerInvokable( TargetLowering::CallLoweringInfo &CLI, @@ -673,6 +690,8 @@ private: CaseRecVector& WorkList, const Value* SV, MachineBasicBlock *SwitchBB); + void splitSwitchCase(CaseRec &CR, CaseItr Pivot, CaseRecVector &WorkList, + const Value *SV, MachineBasicBlock *SwitchBB); bool handleBitTestsSwitchCase(CaseRec& CR, CaseRecVector& WorkList, const Value* SV, @@ -699,6 +718,8 @@ public: void visitJumpTable(JumpTable &JT); void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH, MachineBasicBlock *SwitchBB); + unsigned visitLandingPadClauseBB(GlobalValue *ClauseGV, + MachineBasicBlock *LPadMBB); private: // These all get lowered before this pass. @@ -756,6 +777,8 @@ private: void visitAlloca(const AllocaInst &I); void visitLoad(const LoadInst &I); void visitStore(const StoreInst &I); + void visitMaskedLoad(const CallInst &I); + void visitMaskedStore(const CallInst &I); void visitAtomicCmpXchg(const AtomicCmpXchgInst &I); void visitAtomicRMW(const AtomicRMWInst &I); void visitFence(const FenceInst &I); @@ -784,6 +807,11 @@ private: void visitPatchpoint(ImmutableCallSite CS, MachineBasicBlock *LandingPad = nullptr); + // These three are implemented in StatepointLowering.cpp + void visitStatepoint(const CallInst &I); + void visitGCRelocate(const CallInst &I); + void visitGCResult(const CallInst &I); + void visitUserOp1(const Instruction &I) { llvm_unreachable("UserOp1 should not exist at instruction selection time!"); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index c9f6cff..17eff94 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -187,6 +187,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FMUL: return "fmul"; case ISD::FDIV: return "fdiv"; case ISD::FMA: return "fma"; + case ISD::FMAD: return "fmad"; case ISD::FREM: return "frem"; case ISD::FCOPYSIGN: return "fcopysign"; case ISD::FGETSIGN: return "fgetsign"; @@ -269,6 +270,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { // Other operators case ISD::LOAD: return "load"; case ISD::STORE: return "store"; + case ISD::MLOAD: return "masked_load"; + case ISD::MSTORE: return "masked_store"; case ISD::VAARG: return "vaarg"; case ISD::VACOPY: return "vacopy"; case ISD::VAEND: return "vaend"; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 79109b7..5e867cf 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -11,7 +11,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/GCStrategy.h" #include "ScheduleDAGSDNodes.h" #include "SelectionDAGBuilder.h" #include "llvm/ADT/PostOrderIterator.h" @@ -19,10 +19,11 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" -#include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -31,6 +32,7 @@ #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" @@ -40,6 +42,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -47,7 +50,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" -#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -181,6 +183,10 @@ UseMBPI("use-mbpi", cl::init(true), cl::Hidden); #ifndef NDEBUG +static cl::opt<std::string> +FilterDAGBasicBlockName("filter-view-dags", cl::Hidden, + cl::desc("Only display the basic block whose name " + "matches this for all view-*-dags options")); static cl::opt<bool> ViewDAGCombine1("view-dag-combine1-dags", cl::Hidden, cl::desc("Pop up a window to show dags before the first " @@ -345,7 +351,8 @@ SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, initializeGCModuleInfoPass(*PassRegistry::getPassRegistry()); initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry()); initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry()); - initializeTargetLibraryInfoPass(*PassRegistry::getPassRegistry()); + initializeTargetLibraryInfoWrapperPassPass( + *PassRegistry::getPassRegistry()); } SelectionDAGISel::~SelectionDAGISel() { @@ -359,7 +366,7 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<AliasAnalysis>(); AU.addRequired<GCModuleInfo>(); AU.addPreserved<GCModuleInfo>(); - AU.addRequired<TargetLibraryInfo>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); if (UseMBPI && OptLevel != CodeGenOpt::None) AU.addRequired<BranchProbabilityInfo>(); MachineFunctionPass::getAnalysisUsage(AU); @@ -372,7 +379,7 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { /// /// This is required for correctness, so it must be done at -O0. /// -static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) { +static void SplitCriticalSideEffectEdges(Function &Fn, AliasAnalysis *AA) { // Loop for blocks with phi nodes. for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { PHINode *PN = dyn_cast<PHINode>(BB->begin()); @@ -396,8 +403,9 @@ static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) { continue; // Okay, we have to split this edge. - SplitCriticalEdge(Pred->getTerminator(), - GetSuccessorNumber(Pred, BB), SDISel, true); + SplitCriticalEdge( + Pred->getTerminator(), GetSuccessorNumber(Pred, BB), + CriticalEdgeSplittingOptions(AA).setMergeIdenticalEdges()); goto ReprocessBlock; } } @@ -429,12 +437,12 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { TLI = MF->getSubtarget().getTargetLowering(); RegInfo = &MF->getRegInfo(); AA = &getAnalysis<AliasAnalysis>(); - LibInfo = &getAnalysis<TargetLibraryInfo>(); + LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr; DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); - SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), this); + SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), AA); CurDAG->init(*MF); FuncInfo->set(Fn, *MF, CurDAG); @@ -650,6 +658,12 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { std::string BlockName; int BlockNumber = -1; (void)BlockNumber; + bool MatchFilterBB = false; (void)MatchFilterBB; +#ifndef NDEBUG + MatchFilterBB = (FilterDAGBasicBlockName.empty() || + FilterDAGBasicBlockName == + FuncInfo->MBB->getBasicBlock()->getName().str()); +#endif #ifdef NDEBUG if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs || ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs || @@ -663,7 +677,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); - if (ViewDAGCombine1) CurDAG->viewGraph("dag-combine1 input for " + BlockName); + if (ViewDAGCombine1 && MatchFilterBB) + CurDAG->viewGraph("dag-combine1 input for " + BlockName); // Run the DAG combiner in pre-legalize mode. { @@ -676,8 +691,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { // Second step, hack on the DAG until it only uses operations and types that // the target supports. - if (ViewLegalizeTypesDAGs) CurDAG->viewGraph("legalize-types input for " + - BlockName); + if (ViewLegalizeTypesDAGs && MatchFilterBB) + CurDAG->viewGraph("legalize-types input for " + BlockName); bool Changed; { @@ -691,7 +706,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->NewNodesMustHaveLegalTypes = true; if (Changed) { - if (ViewDAGCombineLT) + if (ViewDAGCombineLT && MatchFilterBB) CurDAG->viewGraph("dag-combine-lt input for " + BlockName); // Run the DAG combiner in post-type-legalize mode. @@ -717,7 +732,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->LegalizeTypes(); } - if (ViewDAGCombineLT) + if (ViewDAGCombineLT && MatchFilterBB) CurDAG->viewGraph("dag-combine-lv input for " + BlockName); // Run the DAG combiner in post-type-legalize mode. @@ -731,7 +746,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); } - if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName); + if (ViewLegalizeDAGs && MatchFilterBB) + CurDAG->viewGraph("legalize input for " + BlockName); { NamedRegionTimer T("DAG Legalization", GroupName, TimePassesIsEnabled); @@ -741,7 +757,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { DEBUG(dbgs() << "Legalized selection DAG: BB#" << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); - if (ViewDAGCombine2) CurDAG->viewGraph("dag-combine2 input for " + BlockName); + if (ViewDAGCombine2 && MatchFilterBB) + CurDAG->viewGraph("dag-combine2 input for " + BlockName); // Run the DAG combiner in post-legalize mode. { @@ -755,7 +772,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { if (OptLevel != CodeGenOpt::None) ComputeLiveOutVRegInfo(); - if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName); + if (ViewISelDAGs && MatchFilterBB) + CurDAG->viewGraph("isel input for " + BlockName); // Third, instruction select all of the operations to machine code, adding the // code to the MachineBasicBlock. @@ -767,7 +785,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { DEBUG(dbgs() << "Selected selection DAG: BB#" << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); - if (ViewSchedDAGs) CurDAG->viewGraph("scheduler input for " + BlockName); + if (ViewSchedDAGs && MatchFilterBB) + CurDAG->viewGraph("scheduler input for " + BlockName); // Schedule machine code. ScheduleDAGSDNodes *Scheduler = CreateScheduler(); @@ -777,7 +796,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { Scheduler->Run(CurDAG, FuncInfo->MBB); } - if (ViewSUnitDAGs) Scheduler->viewGraph(); + if (ViewSUnitDAGs && MatchFilterBB) Scheduler->viewGraph(); // Emit machine code to BB. This can change 'BB' to the last block being // inserted into. @@ -892,6 +911,8 @@ void SelectionDAGISel::DoInstructionSelection() { void SelectionDAGISel::PrepareEHLandingPad() { MachineBasicBlock *MBB = FuncInfo->MBB; + const TargetRegisterClass *PtrRC = TLI->getRegClassFor(TLI->getPointerTy()); + // Add a label to mark the beginning of the landing pad. Deletion of the // landing pad can thus be detected via the MachineModuleInfo. MCSymbol *Label = MF->getMMI().addLandingPad(MBB); @@ -903,8 +924,73 @@ void SelectionDAGISel::PrepareEHLandingPad() { BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II) .addSym(Label); + // If this is an MSVC-style personality function, we need to split the landing + // pad into several BBs. + const BasicBlock *LLVMBB = MBB->getBasicBlock(); + const LandingPadInst *LPadInst = LLVMBB->getLandingPadInst(); + MF->getMMI().addPersonality( + MBB, cast<Function>(LPadInst->getPersonalityFn()->stripPointerCasts())); + if (MF->getMMI().getPersonalityType() == EHPersonality::MSVC_Win64SEH) { + // Make virtual registers and a series of labels that fill in values for the + // clauses. + auto &RI = MF->getRegInfo(); + FuncInfo->ExceptionSelectorVirtReg = RI.createVirtualRegister(PtrRC); + + // Get all invoke BBs that will unwind into the clause BBs. + SmallVector<MachineBasicBlock *, 4> InvokeBBs(MBB->pred_begin(), + MBB->pred_end()); + + // Emit separate machine basic blocks with separate labels for each clause + // before the main landing pad block. + MachineInstrBuilder SelectorPHI = BuildMI( + *MBB, MBB->begin(), SDB->getCurDebugLoc(), TII->get(TargetOpcode::PHI), + FuncInfo->ExceptionSelectorVirtReg); + for (unsigned I = 0, E = LPadInst->getNumClauses(); I != E; ++I) { + // Skip filter clauses, we can't implement them yet. + if (LPadInst->isFilter(I)) + continue; + + MachineBasicBlock *ClauseBB = MF->CreateMachineBasicBlock(LLVMBB); + MF->insert(MBB, ClauseBB); + + // Add the edge from the invoke to the clause. + for (MachineBasicBlock *InvokeBB : InvokeBBs) + InvokeBB->addSuccessor(ClauseBB); + + // Mark the clause as a landing pad or MI passes will delete it. + ClauseBB->setIsLandingPad(); + + GlobalValue *ClauseGV = ExtractTypeInfo(LPadInst->getClause(I)); + + // Start the BB with a label. + MCSymbol *ClauseLabel = MF->getMMI().addClauseForLandingPad(MBB); + BuildMI(*ClauseBB, ClauseBB->begin(), SDB->getCurDebugLoc(), II) + .addSym(ClauseLabel); + + // Construct a simple BB that defines a register with the typeid constant. + FuncInfo->MBB = ClauseBB; + FuncInfo->InsertPt = ClauseBB->end(); + unsigned VReg = SDB->visitLandingPadClauseBB(ClauseGV, MBB); + CurDAG->setRoot(SDB->getRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + + // Add the typeid virtual register to the phi in the main landing pad. + SelectorPHI.addReg(VReg).addMBB(ClauseBB); + } + + // Remove the edge from the invoke to the lpad. + for (MachineBasicBlock *InvokeBB : InvokeBBs) + InvokeBB->removeSuccessor(MBB); + + // Restore FuncInfo back to its previous state and select the main landing + // pad block. + FuncInfo->MBB = MBB; + FuncInfo->InsertPt = MBB->end(); + return; + } + // Mark exception register as live in. - const TargetRegisterClass *PtrRC = TLI->getRegClassFor(TLI->getPointerTy()); if (unsigned Reg = TLI->getExceptionPointerRegister()) FuncInfo->ExceptionPointerVirtReg = MBB->addLiveIn(Reg, PtrRC); diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp new file mode 100644 index 0000000..1271f6b --- /dev/null +++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -0,0 +1,679 @@ +//===-- StatepointLowering.cpp - SDAGBuilder's statepoint code -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file includes support code use by SelectionDAGBuilder when lowering a +// statepoint sequence in SelectionDAG IR. +// +//===----------------------------------------------------------------------===// + +#include "StatepointLowering.h" +#include "SelectionDAGBuilder.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/GCMetadata.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/StackMaps.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Statepoint.h" +#include "llvm/Target/TargetLowering.h" +#include <algorithm> +using namespace llvm; + +#define DEBUG_TYPE "statepoint-lowering" + +STATISTIC(NumSlotsAllocatedForStatepoints, + "Number of stack slots allocated for statepoints"); +STATISTIC(NumOfStatepoints, "Number of statepoint nodes encountered"); +STATISTIC(StatepointMaxSlotsRequired, + "Maximum number of stack slots required for a singe statepoint"); + +void +StatepointLoweringState::startNewStatepoint(SelectionDAGBuilder &Builder) { + // Consistency check + assert(PendingGCRelocateCalls.empty() && + "Trying to visit statepoint before finished processing previous one"); + Locations.clear(); + RelocLocations.clear(); + NextSlotToAllocate = 0; + // Need to resize this on each safepoint - we need the two to stay in + // sync and the clear patterns of a SelectionDAGBuilder have no relation + // to FunctionLoweringInfo. + AllocatedStackSlots.resize(Builder.FuncInfo.StatepointStackSlots.size()); + for (size_t i = 0; i < AllocatedStackSlots.size(); i++) { + AllocatedStackSlots[i] = false; + } +} +void StatepointLoweringState::clear() { + Locations.clear(); + RelocLocations.clear(); + AllocatedStackSlots.clear(); + assert(PendingGCRelocateCalls.empty() && + "cleared before statepoint sequence completed"); +} + +SDValue +StatepointLoweringState::allocateStackSlot(EVT ValueType, + SelectionDAGBuilder &Builder) { + + NumSlotsAllocatedForStatepoints++; + + // The basic scheme here is to first look for a previously created stack slot + // which is not in use (accounting for the fact arbitrary slots may already + // be reserved), or to create a new stack slot and use it. + + // If this doesn't succeed in 40000 iterations, something is seriously wrong + for (int i = 0; i < 40000; i++) { + assert(Builder.FuncInfo.StatepointStackSlots.size() == + AllocatedStackSlots.size() && + "broken invariant"); + const size_t NumSlots = AllocatedStackSlots.size(); + assert(NextSlotToAllocate <= NumSlots && "broken invariant"); + + if (NextSlotToAllocate >= NumSlots) { + assert(NextSlotToAllocate == NumSlots); + // record stats + if (NumSlots + 1 > StatepointMaxSlotsRequired) { + StatepointMaxSlotsRequired = NumSlots + 1; + } + + SDValue SpillSlot = Builder.DAG.CreateStackTemporary(ValueType); + const unsigned FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); + Builder.FuncInfo.StatepointStackSlots.push_back(FI); + AllocatedStackSlots.push_back(true); + return SpillSlot; + } + if (!AllocatedStackSlots[NextSlotToAllocate]) { + const int FI = Builder.FuncInfo.StatepointStackSlots[NextSlotToAllocate]; + AllocatedStackSlots[NextSlotToAllocate] = true; + return Builder.DAG.getFrameIndex(FI, ValueType); + } + // Note: We deliberately choose to advance this only on the failing path. + // Doing so on the suceeding path involes a bit of complexity that caused a + // minor bug previously. Unless performance shows this matters, please + // keep this code as simple as possible. + NextSlotToAllocate++; + } + llvm_unreachable("infinite loop?"); +} + +/// Try to find existing copies of the incoming values in stack slots used for +/// statepoint spilling. If we can find a spill slot for the incoming value, +/// mark that slot as allocated, and reuse the same slot for this safepoint. +/// This helps to avoid series of loads and stores that only serve to resuffle +/// values on the stack between calls. +static void reservePreviousStackSlotForValue(SDValue Incoming, + SelectionDAGBuilder &Builder) { + + if (isa<ConstantSDNode>(Incoming) || isa<FrameIndexSDNode>(Incoming)) { + // We won't need to spill this, so no need to check for previously + // allocated stack slots + return; + } + + SDValue Loc = Builder.StatepointLowering.getLocation(Incoming); + if (Loc.getNode()) { + // duplicates in input + return; + } + + // Search back for the load from a stack slot pattern to find the original + // slot we allocated for this value. We could extend this to deal with + // simple modification patterns, but simple dealing with trivial load/store + // sequences helps a lot already. + if (LoadSDNode *Load = dyn_cast<LoadSDNode>(Incoming)) { + if (auto *FI = dyn_cast<FrameIndexSDNode>(Load->getBasePtr())) { + const int Index = FI->getIndex(); + auto Itr = std::find(Builder.FuncInfo.StatepointStackSlots.begin(), + Builder.FuncInfo.StatepointStackSlots.end(), Index); + if (Itr == Builder.FuncInfo.StatepointStackSlots.end()) { + // not one of the lowering stack slots, can't reuse! + // TODO: Actually, we probably could reuse the stack slot if the value + // hasn't changed at all, but we'd need to look for intervening writes + return; + } else { + // This is one of our dedicated lowering slots + const int Offset = + std::distance(Builder.FuncInfo.StatepointStackSlots.begin(), Itr); + if (Builder.StatepointLowering.isStackSlotAllocated(Offset)) { + // stack slot already assigned to someone else, can't use it! + // TODO: currently we reserve space for gc arguments after doing + // normal allocation for deopt arguments. We should reserve for + // _all_ deopt and gc arguments, then start allocating. This + // will prevent some moves being inserted when vm state changes, + // but gc state doesn't between two calls. + return; + } + // Reserve this stack slot + Builder.StatepointLowering.reserveStackSlot(Offset); + } + + // Cache this slot so we find it when going through the normal + // assignment loop. + SDValue Loc = + Builder.DAG.getTargetFrameIndex(Index, Incoming.getValueType()); + + Builder.StatepointLowering.setLocation(Incoming, Loc); + } + } + + // TODO: handle case where a reloaded value flows through a phi to + // another safepoint. e.g. + // bb1: + // a' = relocated... + // bb2: % pred: bb1, bb3, bb4, etc. + // a_phi = phi(a', ...) + // statepoint ... a_phi + // NOTE: This will require reasoning about cross basic block values. This is + // decidedly non trivial and this might not be the right place to do it. We + // don't really have the information we need here... + + // TODO: handle simple updates. If a value is modified and the original + // value is no longer live, it would be nice to put the modified value in the + // same slot. This allows folding of the memory accesses for some + // instructions types (like an increment). + // statepoint (i) + // i1 = i+1 + // statepoint (i1) +} + +/// Remove any duplicate (as SDValues) from the derived pointer pairs. This +/// is not required for correctness. It's purpose is to reduce the size of +/// StackMap section. It has no effect on the number of spill slots required +/// or the actual lowering. +static void removeDuplicatesGCPtrs(SmallVectorImpl<const Value *> &Bases, + SmallVectorImpl<const Value *> &Ptrs, + SmallVectorImpl<const Value *> &Relocs, + SelectionDAGBuilder &Builder) { + + // This is horribly ineffecient, but I don't care right now + SmallSet<SDValue, 64> Seen; + + SmallVector<const Value *, 64> NewBases, NewPtrs, NewRelocs; + for (size_t i = 0; i < Ptrs.size(); i++) { + SDValue SD = Builder.getValue(Ptrs[i]); + // Only add non-duplicates + if (Seen.count(SD) == 0) { + NewBases.push_back(Bases[i]); + NewPtrs.push_back(Ptrs[i]); + NewRelocs.push_back(Relocs[i]); + } + Seen.insert(SD); + } + assert(Bases.size() >= NewBases.size()); + assert(Ptrs.size() >= NewPtrs.size()); + assert(Relocs.size() >= NewRelocs.size()); + Bases = NewBases; + Ptrs = NewPtrs; + Relocs = NewRelocs; + assert(Ptrs.size() == Bases.size()); + assert(Ptrs.size() == Relocs.size()); +} + +/// Extract call from statepoint, lower it and return pointer to the +/// call node. Also update NodeMap so that getValue(statepoint) will +/// reference lowered call result +static SDNode *lowerCallFromStatepoint(ImmutableStatepoint StatepointSite, + SelectionDAGBuilder &Builder) { + + ImmutableCallSite CS(StatepointSite.getCallSite()); + + // Lower the actual call itself - This is a bit of a hack, but we want to + // avoid modifying the actual lowering code. This is similiar in intent to + // the LowerCallOperands mechanism used by PATCHPOINT, but is structured + // differently. Hopefully, this is slightly more robust w.r.t. calling + // convention, return values, and other function attributes. + Value *ActualCallee = const_cast<Value *>(StatepointSite.actualCallee()); + + std::vector<Value *> Args; + CallInst::const_op_iterator arg_begin = StatepointSite.call_args_begin(); + CallInst::const_op_iterator arg_end = StatepointSite.call_args_end(); + Args.insert(Args.end(), arg_begin, arg_end); + // TODO: remove the creation of a new instruction! We should not be + // modifying the IR (even temporarily) at this point. + CallInst *Tmp = CallInst::Create(ActualCallee, Args); + Tmp->setTailCall(CS.isTailCall()); + Tmp->setCallingConv(CS.getCallingConv()); + Tmp->setAttributes(CS.getAttributes()); + Builder.LowerCallTo(Tmp, Builder.getValue(ActualCallee), false); + + // Handle the return value of the call iff any. + const bool HasDef = !Tmp->getType()->isVoidTy(); + if (HasDef) { + // The value of the statepoint itself will be the value of call itself. + // We'll replace the actually call node shortly. gc_result will grab + // this value. + Builder.setValue(CS.getInstruction(), Builder.getValue(Tmp)); + } else { + // The token value is never used from here on, just generate a poison value + Builder.setValue(CS.getInstruction(), Builder.DAG.getIntPtrConstant(-1)); + } + // Remove the fake entry we created so we don't have a hanging reference + // after we delete this node. + Builder.removeValue(Tmp); + delete Tmp; + Tmp = nullptr; + + // Search for the call node + // The following code is essentially reverse engineering X86's + // LowerCallTo. + SDNode *CallNode = nullptr; + + // We just emitted a call, so it should be last thing generated + SDValue Chain = Builder.DAG.getRoot(); + + // Find closest CALLSEQ_END walking back through lowered nodes if needed + SDNode *CallEnd = Chain.getNode(); + int Sanity = 0; + while (CallEnd->getOpcode() != ISD::CALLSEQ_END) { + CallEnd = CallEnd->getGluedNode(); + assert(CallEnd && "Can not find call node"); + assert(Sanity < 20 && "should have found call end already"); + Sanity++; + } + assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && + "Expected a callseq node."); + assert(CallEnd->getGluedNode()); + + // Step back inside the CALLSEQ + CallNode = CallEnd->getGluedNode(); + return CallNode; +} + +/// Callect all gc pointers coming into statepoint intrinsic, clean them up, +/// and return two arrays: +/// Bases - base pointers incoming to this statepoint +/// Ptrs - derived pointers incoming to this statepoint +/// Relocs - the gc_relocate corresponding to each base/ptr pair +/// Elements of this arrays should be in one-to-one correspondence with each +/// other i.e Bases[i], Ptrs[i] are from the same gcrelocate call +static void +getIncomingStatepointGCValues(SmallVectorImpl<const Value *> &Bases, + SmallVectorImpl<const Value *> &Ptrs, + SmallVectorImpl<const Value *> &Relocs, + ImmutableStatepoint StatepointSite, + SelectionDAGBuilder &Builder) { + for (GCRelocateOperands relocateOpers : + StatepointSite.getRelocates(StatepointSite)) { + Relocs.push_back(relocateOpers.getUnderlyingCallSite().getInstruction()); + Bases.push_back(relocateOpers.basePtr()); + Ptrs.push_back(relocateOpers.derivedPtr()); + } + + // Remove any redundant llvm::Values which map to the same SDValue as another + // input. Also has the effect of removing duplicates in the original + // llvm::Value input list as well. This is a useful optimization for + // reducing the size of the StackMap section. It has no other impact. + removeDuplicatesGCPtrs(Bases, Ptrs, Relocs, Builder); + + assert(Bases.size() == Ptrs.size() && Ptrs.size() == Relocs.size()); +} + +/// Spill a value incoming to the statepoint. It might be either part of +/// vmstate +/// or gcstate. In both cases unconditionally spill it on the stack unless it +/// is a null constant. Return pair with first element being frame index +/// containing saved value and second element with outgoing chain from the +/// emitted store +static std::pair<SDValue, SDValue> +spillIncomingStatepointValue(SDValue Incoming, SDValue Chain, + SelectionDAGBuilder &Builder) { + SDValue Loc = Builder.StatepointLowering.getLocation(Incoming); + + // Emit new store if we didn't do it for this ptr before + if (!Loc.getNode()) { + Loc = Builder.StatepointLowering.allocateStackSlot(Incoming.getValueType(), + Builder); + assert(isa<FrameIndexSDNode>(Loc)); + int Index = cast<FrameIndexSDNode>(Loc)->getIndex(); + // We use TargetFrameIndex so that isel will not select it into LEA + Loc = Builder.DAG.getTargetFrameIndex(Index, Incoming.getValueType()); + + // TODO: We can create TokenFactor node instead of + // chaining stores one after another, this may allow + // a bit more optimal scheduling for them + Chain = Builder.DAG.getStore(Chain, Builder.getCurSDLoc(), Incoming, Loc, + MachinePointerInfo::getFixedStack(Index), + false, false, 0); + + Builder.StatepointLowering.setLocation(Incoming, Loc); + } + + assert(Loc.getNode()); + return std::make_pair(Loc, Chain); +} + +/// Lower a single value incoming to a statepoint node. This value can be +/// either a deopt value or a gc value, the handling is the same. We special +/// case constants and allocas, then fall back to spilling if required. +static void lowerIncomingStatepointValue(SDValue Incoming, + SmallVectorImpl<SDValue> &Ops, + SelectionDAGBuilder &Builder) { + SDValue Chain = Builder.getRoot(); + + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Incoming)) { + // If the original value was a constant, make sure it gets recorded as + // such in the stackmap. This is required so that the consumer can + // parse any internal format to the deopt state. It also handles null + // pointers and other constant pointers in GC states + Ops.push_back( + Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); + Ops.push_back(Builder.DAG.getTargetConstant(C->getSExtValue(), MVT::i64)); + } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) { + // This handles allocas as arguments to the statepoint + const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo(); + Ops.push_back( + Builder.DAG.getTargetFrameIndex(FI->getIndex(), TLI.getPointerTy())); + } else { + // Otherwise, locate a spill slot and explicitly spill it so it + // can be found by the runtime later. We currently do not support + // tracking values through callee saved registers to their eventual + // spill location. This would be a useful optimization, but would + // need to be optional since it requires a lot of complexity on the + // runtime side which not all would support. + std::pair<SDValue, SDValue> Res = + spillIncomingStatepointValue(Incoming, Chain, Builder); + Ops.push_back(Res.first); + Chain = Res.second; + } + + Builder.DAG.setRoot(Chain); +} + +/// Lower deopt state and gc pointer arguments of the statepoint. The actual +/// lowering is described in lowerIncomingStatepointValue. This function is +/// responsible for lowering everything in the right position and playing some +/// tricks to avoid redundant stack manipulation where possible. On +/// completion, 'Ops' will contain ready to use operands for machine code +/// statepoint. The chain nodes will have already been created and the DAG root +/// will be set to the last value spilled (if any were). +static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, + ImmutableStatepoint StatepointSite, + SelectionDAGBuilder &Builder) { + + // Lower the deopt and gc arguments for this statepoint. Layout will + // be: deopt argument length, deopt arguments.., gc arguments... + + SmallVector<const Value *, 64> Bases, Ptrs, Relocations; + getIncomingStatepointGCValues(Bases, Ptrs, Relocations, + StatepointSite, Builder); + +#ifndef NDEBUG + // Check that each of the gc pointer and bases we've gotten out of the + // safepoint is something the strategy thinks might be a pointer into the GC + // heap. This is basically just here to help catch errors during statepoint + // insertion. TODO: This should actually be in the Verifier, but we can't get + // to the GCStrategy from there (yet). + if (Builder.GFI) { + GCStrategy &S = Builder.GFI->getStrategy(); + for (const Value *V : Bases) { + auto Opt = S.isGCManagedPointer(V); + if (Opt.hasValue()) { + assert(Opt.getValue() && + "non gc managed base pointer found in statepoint"); + } + } + for (const Value *V : Ptrs) { + auto Opt = S.isGCManagedPointer(V); + if (Opt.hasValue()) { + assert(Opt.getValue() && + "non gc managed derived pointer found in statepoint"); + } + } + for (const Value *V : Relocations) { + auto Opt = S.isGCManagedPointer(V); + if (Opt.hasValue()) { + assert(Opt.getValue() && "non gc managed pointer relocated"); + } + } + } +#endif + + + + // Before we actually start lowering (and allocating spill slots for values), + // reserve any stack slots which we judge to be profitable to reuse for a + // particular value. This is purely an optimization over the code below and + // doesn't change semantics at all. It is important for performance that we + // reserve slots for both deopt and gc values before lowering either. + for (auto I = StatepointSite.vm_state_begin() + 1, + E = StatepointSite.vm_state_end(); + I != E; ++I) { + Value *V = *I; + SDValue Incoming = Builder.getValue(V); + reservePreviousStackSlotForValue(Incoming, Builder); + } + for (unsigned i = 0; i < Bases.size() * 2; ++i) { + // Even elements will contain base, odd elements - derived ptr + const Value *V = i % 2 ? Bases[i / 2] : Ptrs[i / 2]; + SDValue Incoming = Builder.getValue(V); + reservePreviousStackSlotForValue(Incoming, Builder); + } + + // First, prefix the list with the number of unique values to be + // lowered. Note that this is the number of *Values* not the + // number of SDValues required to lower them. + const int NumVMSArgs = StatepointSite.numTotalVMSArgs(); + Ops.push_back( + Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); + Ops.push_back(Builder.DAG.getTargetConstant(NumVMSArgs, MVT::i64)); + + assert(NumVMSArgs + 1 == std::distance(StatepointSite.vm_state_begin(), + StatepointSite.vm_state_end())); + + // The vm state arguments are lowered in an opaque manner. We do + // not know what type of values are contained within. We skip the + // first one since that happens to be the total number we lowered + // explicitly just above. We could have left it in the loop and + // not done it explicitly, but it's far easier to understand this + // way. + for (auto I = StatepointSite.vm_state_begin() + 1, + E = StatepointSite.vm_state_end(); + I != E; ++I) { + const Value *V = *I; + SDValue Incoming = Builder.getValue(V); + lowerIncomingStatepointValue(Incoming, Ops, Builder); + } + + // Finally, go ahead and lower all the gc arguments. There's no prefixed + // length for this one. After lowering, we'll have the base and pointer + // arrays interwoven with each (lowered) base pointer immediately followed by + // it's (lowered) derived pointer. i.e + // (base[0], ptr[0], base[1], ptr[1], ...) + for (unsigned i = 0; i < Bases.size() * 2; ++i) { + // Even elements will contain base, odd elements - derived ptr + const Value *V = i % 2 ? Bases[i / 2] : Ptrs[i / 2]; + SDValue Incoming = Builder.getValue(V); + lowerIncomingStatepointValue(Incoming, Ops, Builder); + } +} + +void SelectionDAGBuilder::visitStatepoint(const CallInst &CI) { + // Check some preconditions for sanity + assert(isStatepoint(&CI) && + "function called must be the statepoint function"); + + LowerStatepoint(ImmutableStatepoint(&CI)); +} + +void SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP) { + // The basic scheme here is that information about both the original call and + // the safepoint is encoded in the CallInst. We create a temporary call and + // lower it, then reverse engineer the calling sequence. + + NumOfStatepoints++; + // Clear state + StatepointLowering.startNewStatepoint(*this); + + ImmutableCallSite CS(ISP.getCallSite()); + +#ifndef NDEBUG + // Consistency check + for (const User *U : CS->users()) { + const CallInst *Call = cast<CallInst>(U); + if (isGCRelocate(Call)) + StatepointLowering.scheduleRelocCall(*Call); + } +#endif + +#ifndef NDEBUG + // If this is a malformed statepoint, report it early to simplify debugging. + // This should catch any IR level mistake that's made when constructing or + // transforming statepoints. + ISP.verify(); + + // Check that the associated GCStrategy expects to encounter statepoints. + // TODO: This if should become an assert. For now, we allow the GCStrategy + // to be optional for backwards compatibility. This will only last a short + // period (i.e. a couple of weeks). + if (GFI) { + assert(GFI->getStrategy().useStatepoints() && + "GCStrategy does not expect to encounter statepoints"); + } +#endif + + + // Lower statepoint vmstate and gcstate arguments + SmallVector<SDValue, 10> LoweredArgs; + lowerStatepointMetaArgs(LoweredArgs, ISP, *this); + + // Get call node, we will replace it later with statepoint + SDNode *CallNode = lowerCallFromStatepoint(ISP, *this); + + // Construct the actual STATEPOINT node with all the appropriate arguments + // and return values. + + // TODO: Currently, all of these operands are being marked as read/write in + // PrologEpilougeInserter.cpp, we should special case the VMState arguments + // and flags to be read-only. + SmallVector<SDValue, 40> Ops; + + // Calculate and push starting position of vmstate arguments + // Call Node: Chain, Target, {Args}, RegMask, [Glue] + SDValue Glue; + if (CallNode->getGluedNode()) { + // Glue is always last operand + Glue = CallNode->getOperand(CallNode->getNumOperands() - 1); + } + // Get number of arguments incoming directly into call node + unsigned NumCallRegArgs = + CallNode->getNumOperands() - (Glue.getNode() ? 4 : 3); + Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, MVT::i32)); + + // Add call target + SDValue CallTarget = SDValue(CallNode->getOperand(1).getNode(), 0); + Ops.push_back(CallTarget); + + // Add call arguments + // Get position of register mask in the call + SDNode::op_iterator RegMaskIt; + if (Glue.getNode()) + RegMaskIt = CallNode->op_end() - 2; + else + RegMaskIt = CallNode->op_end() - 1; + Ops.insert(Ops.end(), CallNode->op_begin() + 2, RegMaskIt); + + // Add a leading constant argument with the Flags and the calling convention + // masked together + CallingConv::ID CallConv = CS.getCallingConv(); + int Flags = dyn_cast<ConstantInt>(CS.getArgument(2))->getZExtValue(); + assert(Flags == 0 && "not expected to be used"); + Ops.push_back(DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); + Ops.push_back( + DAG.getTargetConstant(Flags | ((unsigned)CallConv << 1), MVT::i64)); + + // Insert all vmstate and gcstate arguments + Ops.insert(Ops.end(), LoweredArgs.begin(), LoweredArgs.end()); + + // Add register mask from call node + Ops.push_back(*RegMaskIt); + + // Add chain + Ops.push_back(CallNode->getOperand(0)); + + // Same for the glue, but we add it only if original call had it + if (Glue.getNode()) + Ops.push_back(Glue); + + // Compute return values. Provide a glue output since we consume one as + // input. This allows someone else to chain off us as needed. + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + + SDNode *StatepointMCNode = DAG.getMachineNode(TargetOpcode::STATEPOINT, + getCurSDLoc(), NodeTys, Ops); + + // Replace original call + DAG.ReplaceAllUsesWith(CallNode, StatepointMCNode); // This may update Root + // Remove originall call node + DAG.DeleteNode(CallNode); + + // DON'T set the root - under the assumption that it's already set past the + // inserted node we created. + + // TODO: A better future implementation would be to emit a single variable + // argument, variable return value STATEPOINT node here and then hookup the + // return value of each gc.relocate to the respective output of the + // previously emitted STATEPOINT value. Unfortunately, this doesn't appear + // to actually be possible today. +} + +void SelectionDAGBuilder::visitGCResult(const CallInst &CI) { + // The result value of the gc_result is simply the result of the actual + // call. We've already emitted this, so just grab the value. + Instruction *I = cast<Instruction>(CI.getArgOperand(0)); + assert(isStatepoint(I) && + "first argument must be a statepoint token"); + + setValue(&CI, getValue(I)); +} + +void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) { +#ifndef NDEBUG + // Consistency check + StatepointLowering.relocCallVisited(CI); +#endif + + GCRelocateOperands relocateOpers(&CI); + SDValue SD = getValue(relocateOpers.derivedPtr()); + + if (isa<ConstantSDNode>(SD) || isa<FrameIndexSDNode>(SD)) { + // We didn't need to spill these special cases (constants and allocas). + // See the handling in spillIncomingValueForStatepoint for detail. + setValue(&CI, SD); + return; + } + + SDValue Loc = StatepointLowering.getRelocLocation(SD); + // Emit new load if we did not emit it before + if (!Loc.getNode()) { + SDValue SpillSlot = StatepointLowering.getLocation(SD); + int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); + + // Be conservative: flush all pending loads + // TODO: Probably we can be less restrictive on this, + // it may allow more scheduling opprtunities + SDValue Chain = getRoot(); + + Loc = DAG.getLoad(SpillSlot.getValueType(), getCurSDLoc(), Chain, + SpillSlot, MachinePointerInfo::getFixedStack(FI), false, + false, false, 0); + + StatepointLowering.setRelocLocation(SD, Loc); + + // Again, be conservative, don't emit pending loads + DAG.setRoot(Loc.getValue(1)); + } + + assert(Loc.getNode()); + setValue(&CI, Loc); +} diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.h b/lib/CodeGen/SelectionDAG/StatepointLowering.h new file mode 100644 index 0000000..673112c --- /dev/null +++ b/lib/CodeGen/SelectionDAG/StatepointLowering.h @@ -0,0 +1,138 @@ +//===-- StatepointLowering.h - SDAGBuilder's statepoint code -*- C++ -*---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file includes support code use by SelectionDAGBuilder when lowering a +// statepoint sequence in SelectionDAG IR. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H +#define LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include <vector> + +namespace llvm { +class SelectionDAGBuilder; + +/// This class tracks both per-statepoint and per-selectiondag information. +/// For each statepoint it tracks locations of it's gc valuess (incoming and +/// relocated) and list of gcreloc calls scheduled for visiting (this is +/// used for a debug mode consistency check only). The spill slot tracking +/// works in concert with information in FunctionLoweringInfo. +class StatepointLoweringState { +public: + StatepointLoweringState() : NextSlotToAllocate(0) { + } + + /// Reset all state tracking for a newly encountered safepoint. Also + /// performs some consistency checking. + void startNewStatepoint(SelectionDAGBuilder &Builder); + + /// Clear the memory usage of this object. This is called from + /// SelectionDAGBuilder::clear. We require this is never called in the + /// midst of processing a statepoint sequence. + void clear(); + + /// Returns the spill location of a value incoming to the current + /// statepoint. Will return SDValue() if this value hasn't been + /// spilled. Otherwise, the value has already been spilled and no + /// further action is required by the caller. + SDValue getLocation(SDValue val) { + if (!Locations.count(val)) + return SDValue(); + return Locations[val]; + } + void setLocation(SDValue val, SDValue Location) { + assert(!Locations.count(val) && + "Trying to allocate already allocated location"); + Locations[val] = Location; + } + + /// Returns the relocated value for a given input pointer. Will + /// return SDValue() if this value hasn't yet been reloaded from + /// it's stack slot after the statepoint. Otherwise, the value + /// has already been reloaded and the SDValue of that reload will + /// be returned. Note that VMState values are spilled but not + /// reloaded (since they don't change at the safepoint unless + /// also listed in the GC pointer section) and will thus never + /// be in this map + SDValue getRelocLocation(SDValue val) { + if (!RelocLocations.count(val)) + return SDValue(); + return RelocLocations[val]; + } + void setRelocLocation(SDValue val, SDValue Location) { + assert(!RelocLocations.count(val) && + "Trying to allocate already allocated location"); + RelocLocations[val] = Location; + } + + /// Record the fact that we expect to encounter a given gc_relocate + /// before the next statepoint. If we don't see it, we'll report + /// an assertion. + void scheduleRelocCall(const CallInst &RelocCall) { + PendingGCRelocateCalls.push_back(&RelocCall); + } + /// Remove this gc_relocate from the list we're expecting to see + /// before the next statepoint. If we weren't expecting to see + /// it, we'll report an assertion. + void relocCallVisited(const CallInst &RelocCall) { + SmallVectorImpl<const CallInst *>::iterator itr = + std::find(PendingGCRelocateCalls.begin(), PendingGCRelocateCalls.end(), + &RelocCall); + assert(itr != PendingGCRelocateCalls.end() && + "Visited unexpected gcrelocate call"); + PendingGCRelocateCalls.erase(itr); + } + + // TODO: Should add consistency tracking to ensure we encounter + // expected gc_result calls too. + + /// Get a stack slot we can use to store an value of type ValueType. This + /// will hopefully be a recylced slot from another statepoint. + SDValue allocateStackSlot(EVT ValueType, SelectionDAGBuilder &Builder); + + void reserveStackSlot(int Offset) { + assert(Offset >= 0 && Offset < (int)AllocatedStackSlots.size() && + "out of bounds"); + assert(!AllocatedStackSlots[Offset] && "already reserved!"); + assert(NextSlotToAllocate <= (unsigned)Offset && "consistency!"); + AllocatedStackSlots[Offset] = true; + } + bool isStackSlotAllocated(int Offset) { + assert(Offset >= 0 && Offset < (int)AllocatedStackSlots.size() && + "out of bounds"); + return AllocatedStackSlots[Offset]; + } + +private: + /// Maps pre-relocation value (gc pointer directly incoming into statepoint) + /// into it's location (currently only stack slots) + DenseMap<SDValue, SDValue> Locations; + /// Map pre-relocated value into it's new relocated location + DenseMap<SDValue, SDValue> RelocLocations; + + /// A boolean indicator for each slot listed in the FunctionInfo as to + /// whether it has been used in the current statepoint. Since we try to + /// preserve stack slots across safepoints, there can be gaps in which + /// slots have been allocated. + SmallVector<bool, 50> AllocatedStackSlots; + + /// Points just beyond the last slot known to have been allocated + unsigned NextSlotToAllocate; + + /// Keep track of pending gcrelocate calls for consistency check + SmallVector<const CallInst *, 10> PendingGCRelocateCalls; +}; +} // end namespace llvm + +#endif // LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 9aef5ed..0a3c926 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -793,19 +793,26 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, APInt MsbMask = APInt::getHighBitsSet(BitWidth, 1); // If we only care about the highest bit, don't bother shifting right. - if (MsbMask == DemandedMask) { + if (MsbMask == NewMask) { unsigned ShAmt = ExVT.getScalarType().getSizeInBits(); SDValue InOp = Op.getOperand(0); - - // Compute the correct shift amount type, which must be getShiftAmountTy - // for scalar types after legalization. - EVT ShiftAmtTy = Op.getValueType(); - if (TLO.LegalTypes() && !ShiftAmtTy.isVector()) - ShiftAmtTy = getShiftAmountTy(ShiftAmtTy); - - SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, ShiftAmtTy); - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, - Op.getValueType(), InOp, ShiftAmt)); + unsigned VTBits = Op->getValueType(0).getScalarType().getSizeInBits(); + bool AlreadySignExtended = + TLO.DAG.ComputeNumSignBits(InOp) >= VTBits-ShAmt+1; + // However if the input is already sign extended we expect the sign + // extension to be dropped altogether later and do not simplify. + if (!AlreadySignExtended) { + // Compute the correct shift amount type, which must be getShiftAmountTy + // for scalar types after legalization. + EVT ShiftAmtTy = Op.getValueType(); + if (TLO.LegalTypes() && !ShiftAmtTy.isVector()) + ShiftAmtTy = getShiftAmountTy(ShiftAmtTy); + + SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, ShiftAmtTy); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, + Op.getValueType(), InOp, + ShiftAmt)); + } } // Sign extension. Compute the demanded bits in the result that are not @@ -1283,36 +1290,53 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } // (zext x) == C --> x == (trunc C) - if (DCI.isBeforeLegalize() && N0->hasOneUse() && - (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + // (sext x) == C --> x == (trunc C) + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + DCI.isBeforeLegalize() && N0->hasOneUse()) { unsigned MinBits = N0.getValueSizeInBits(); - SDValue PreZExt; + SDValue PreExt; + bool Signed = false; if (N0->getOpcode() == ISD::ZERO_EXTEND) { // ZExt MinBits = N0->getOperand(0).getValueSizeInBits(); - PreZExt = N0->getOperand(0); + PreExt = N0->getOperand(0); } else if (N0->getOpcode() == ISD::AND) { // DAGCombine turns costly ZExts into ANDs if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) if ((C->getAPIntValue()+1).isPowerOf2()) { MinBits = C->getAPIntValue().countTrailingOnes(); - PreZExt = N0->getOperand(0); + PreExt = N0->getOperand(0); } + } else if (N0->getOpcode() == ISD::SIGN_EXTEND) { + // SExt + MinBits = N0->getOperand(0).getValueSizeInBits(); + PreExt = N0->getOperand(0); + Signed = true; } else if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(N0)) { - // ZEXTLOAD + // ZEXTLOAD / SEXTLOAD if (LN0->getExtensionType() == ISD::ZEXTLOAD) { MinBits = LN0->getMemoryVT().getSizeInBits(); - PreZExt = N0; + PreExt = N0; + } else if (LN0->getExtensionType() == ISD::SEXTLOAD) { + Signed = true; + MinBits = LN0->getMemoryVT().getSizeInBits(); + PreExt = N0; } } + // Figure out how many bits we need to preserve this constant. + unsigned ReqdBits = Signed ? + C1.getBitWidth() - C1.getNumSignBits() + 1 : + C1.getActiveBits(); + // Make sure we're not losing bits from the constant. if (MinBits > 0 && - MinBits < C1.getBitWidth() && MinBits >= C1.getActiveBits()) { + MinBits < C1.getBitWidth() && + MinBits >= ReqdBits) { EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits); if (isTypeDesirableForOp(ISD::SETCC, MinVT)) { // Will get folded away. - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreZExt); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt); SDValue C = DAG.getConstant(C1.trunc(MinBits), MinVT); return DAG.getSetCC(dl, VT, Trunc, C, Cond); } @@ -2163,9 +2187,10 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, } } -std::pair<unsigned, const TargetRegisterClass*> TargetLowering:: -getRegForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const { +std::pair<unsigned, const TargetRegisterClass *> +TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI, + const std::string &Constraint, + MVT VT) const { if (Constraint.empty() || Constraint[0] != '{') return std::make_pair(0u, static_cast<TargetRegisterClass*>(nullptr)); assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?"); @@ -2177,8 +2202,6 @@ getRegForInlineAsmConstraint(const std::string &Constraint, std::make_pair(0u, static_cast<const TargetRegisterClass*>(nullptr)); // Figure out which register class contains this reg. - const TargetRegisterInfo *RI = - getTargetMachine().getSubtargetImpl()->getRegisterInfo(); for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(), E = RI->regclass_end(); RCI != E; ++RCI) { const TargetRegisterClass *RC = *RCI; @@ -2231,8 +2254,9 @@ unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const { /// and also tie in the associated operand values. /// If this returns an empty vector, and if the constraint string itself /// isn't empty, there was an error parsing. -TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( - ImmutableCallSite CS) const { +TargetLowering::AsmOperandInfoVector +TargetLowering::ParseConstraints(const TargetRegisterInfo *TRI, + ImmutableCallSite CS) const { /// ConstraintOperands - Information about all of the constraints. AsmOperandInfoVector ConstraintOperands; const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); @@ -2323,7 +2347,7 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( } // If we have multiple alternative constraints, select the best alternative. - if (ConstraintOperands.size()) { + if (!ConstraintOperands.empty()) { if (maCount) { unsigned bestMAIndex = 0; int bestWeight = -1; @@ -2394,12 +2418,12 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; if (OpInfo.ConstraintVT != Input.ConstraintVT) { - std::pair<unsigned, const TargetRegisterClass*> MatchRC = - getRegForInlineAsmConstraint(OpInfo.ConstraintCode, - OpInfo.ConstraintVT); - std::pair<unsigned, const TargetRegisterClass*> InputRC = - getRegForInlineAsmConstraint(Input.ConstraintCode, - Input.ConstraintVT); + std::pair<unsigned, const TargetRegisterClass *> MatchRC = + getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode, + OpInfo.ConstraintVT); + std::pair<unsigned, const TargetRegisterClass *> InputRC = + getRegForInlineAsmConstraint(TRI, Input.ConstraintCode, + Input.ConstraintVT); if ((OpInfo.ConstraintVT.isInteger() != Input.ConstraintVT.isInteger()) || (MatchRC.second != InputRC.second)) { diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp index 0be00f0..b12e943 100644 --- a/lib/CodeGen/ShadowStackGC.cpp +++ b/lib/CodeGen/ShadowStackGC.cpp @@ -38,416 +38,18 @@ using namespace llvm; #define DEBUG_TYPE "shadowstackgc" namespace { - - class ShadowStackGC : public GCStrategy { - /// RootChain - This is the global linked-list that contains the chain of GC - /// roots. - GlobalVariable *Head; - - /// StackEntryTy - Abstract type of a link in the shadow stack. - /// - StructType *StackEntryTy; - StructType *FrameMapTy; - - /// Roots - GC roots in the current function. Each is a pair of the - /// intrinsic call and its corresponding alloca. - std::vector<std::pair<CallInst*,AllocaInst*> > Roots; - - public: - ShadowStackGC(); - - bool initializeCustomLowering(Module &M) override; - bool performCustomLowering(Function &F) override; - - private: - bool IsNullValue(Value *V); - Constant *GetFrameMap(Function &F); - Type* GetConcreteStackEntryType(Function &F); - void CollectRoots(Function &F); - static GetElementPtrInst *CreateGEP(LLVMContext &Context, - IRBuilder<> &B, Value *BasePtr, - int Idx1, const char *Name); - static GetElementPtrInst *CreateGEP(LLVMContext &Context, - IRBuilder<> &B, Value *BasePtr, - int Idx1, int Idx2, const char *Name); - }; - +class ShadowStackGC : public GCStrategy { +public: + ShadowStackGC(); +}; } static GCRegistry::Add<ShadowStackGC> -X("shadow-stack", "Very portable GC for uncooperative code generators"); - -namespace { - /// EscapeEnumerator - This is a little algorithm to find all escape points - /// from a function so that "finally"-style code can be inserted. In addition - /// to finding the existing return and unwind instructions, it also (if - /// necessary) transforms any call instructions into invokes and sends them to - /// a landing pad. - /// - /// It's wrapped up in a state machine using the same transform C# uses for - /// 'yield return' enumerators, This transform allows it to be non-allocating. - class EscapeEnumerator { - Function &F; - const char *CleanupBBName; - - // State. - int State; - Function::iterator StateBB, StateE; - IRBuilder<> Builder; - - public: - EscapeEnumerator(Function &F, const char *N = "cleanup") - : F(F), CleanupBBName(N), State(0), Builder(F.getContext()) {} - - IRBuilder<> *Next() { - switch (State) { - default: - return nullptr; - - case 0: - StateBB = F.begin(); - StateE = F.end(); - State = 1; - - case 1: - // Find all 'return', 'resume', and 'unwind' instructions. - while (StateBB != StateE) { - BasicBlock *CurBB = StateBB++; - - // Branches and invokes do not escape, only unwind, resume, and return - // do. - TerminatorInst *TI = CurBB->getTerminator(); - if (!isa<ReturnInst>(TI) && !isa<ResumeInst>(TI)) - continue; - - Builder.SetInsertPoint(TI->getParent(), TI); - return &Builder; - } - - State = 2; - - // Find all 'call' instructions. - SmallVector<Instruction*,16> Calls; - for (Function::iterator BB = F.begin(), - E = F.end(); BB != E; ++BB) - for (BasicBlock::iterator II = BB->begin(), - EE = BB->end(); II != EE; ++II) - if (CallInst *CI = dyn_cast<CallInst>(II)) - if (!CI->getCalledFunction() || - !CI->getCalledFunction()->getIntrinsicID()) - Calls.push_back(CI); - - if (Calls.empty()) - return nullptr; + X("shadow-stack", "Very portable GC for uncooperative code generators"); - // Create a cleanup block. - LLVMContext &C = F.getContext(); - BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F); - Type *ExnTy = StructType::get(Type::getInt8PtrTy(C), - Type::getInt32Ty(C), nullptr); - Constant *PersFn = - F.getParent()-> - getOrInsertFunction("__gcc_personality_v0", - FunctionType::get(Type::getInt32Ty(C), true)); - LandingPadInst *LPad = LandingPadInst::Create(ExnTy, PersFn, 1, - "cleanup.lpad", - CleanupBB); - LPad->setCleanup(true); - ResumeInst *RI = ResumeInst::Create(LPad, CleanupBB); +void llvm::linkShadowStackGC() {} - // Transform the 'call' instructions into 'invoke's branching to the - // cleanup block. Go in reverse order to make prettier BB names. - SmallVector<Value*,16> Args; - for (unsigned I = Calls.size(); I != 0; ) { - CallInst *CI = cast<CallInst>(Calls[--I]); - - // Split the basic block containing the function call. - BasicBlock *CallBB = CI->getParent(); - BasicBlock *NewBB = - CallBB->splitBasicBlock(CI, CallBB->getName() + ".cont"); - - // Remove the unconditional branch inserted at the end of CallBB. - CallBB->getInstList().pop_back(); - NewBB->getInstList().remove(CI); - - // Create a new invoke instruction. - Args.clear(); - CallSite CS(CI); - Args.append(CS.arg_begin(), CS.arg_end()); - - InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), - NewBB, CleanupBB, - Args, CI->getName(), CallBB); - II->setCallingConv(CI->getCallingConv()); - II->setAttributes(CI->getAttributes()); - CI->replaceAllUsesWith(II); - delete CI; - } - - Builder.SetInsertPoint(RI->getParent(), RI); - return &Builder; - } - } - }; -} - -// ----------------------------------------------------------------------------- - -void llvm::linkShadowStackGC() { } - -ShadowStackGC::ShadowStackGC() : Head(nullptr), StackEntryTy(nullptr) { +ShadowStackGC::ShadowStackGC() { InitRoots = true; CustomRoots = true; } - -Constant *ShadowStackGC::GetFrameMap(Function &F) { - // doInitialization creates the abstract type of this value. - Type *VoidPtr = Type::getInt8PtrTy(F.getContext()); - - // Truncate the ShadowStackDescriptor if some metadata is null. - unsigned NumMeta = 0; - SmallVector<Constant*, 16> Metadata; - for (unsigned I = 0; I != Roots.size(); ++I) { - Constant *C = cast<Constant>(Roots[I].first->getArgOperand(1)); - if (!C->isNullValue()) - NumMeta = I + 1; - Metadata.push_back(ConstantExpr::getBitCast(C, VoidPtr)); - } - Metadata.resize(NumMeta); - - Type *Int32Ty = Type::getInt32Ty(F.getContext()); - - Constant *BaseElts[] = { - ConstantInt::get(Int32Ty, Roots.size(), false), - ConstantInt::get(Int32Ty, NumMeta, false), - }; - - Constant *DescriptorElts[] = { - ConstantStruct::get(FrameMapTy, BaseElts), - ConstantArray::get(ArrayType::get(VoidPtr, NumMeta), Metadata) - }; - - Type *EltTys[] = { DescriptorElts[0]->getType(),DescriptorElts[1]->getType()}; - StructType *STy = StructType::create(EltTys, "gc_map."+utostr(NumMeta)); - - Constant *FrameMap = ConstantStruct::get(STy, DescriptorElts); - - // FIXME: Is this actually dangerous as WritingAnLLVMPass.html claims? Seems - // that, short of multithreaded LLVM, it should be safe; all that is - // necessary is that a simple Module::iterator loop not be invalidated. - // Appending to the GlobalVariable list is safe in that sense. - // - // All of the output passes emit globals last. The ExecutionEngine - // explicitly supports adding globals to the module after - // initialization. - // - // Still, if it isn't deemed acceptable, then this transformation needs - // to be a ModulePass (which means it cannot be in the 'llc' pipeline - // (which uses a FunctionPassManager (which segfaults (not asserts) if - // provided a ModulePass))). - Constant *GV = new GlobalVariable(*F.getParent(), FrameMap->getType(), true, - GlobalVariable::InternalLinkage, - FrameMap, "__gc_" + F.getName()); - - Constant *GEPIndices[2] = { - ConstantInt::get(Type::getInt32Ty(F.getContext()), 0), - ConstantInt::get(Type::getInt32Ty(F.getContext()), 0) - }; - return ConstantExpr::getGetElementPtr(GV, GEPIndices); -} - -Type* ShadowStackGC::GetConcreteStackEntryType(Function &F) { - // doInitialization creates the generic version of this type. - std::vector<Type*> EltTys; - EltTys.push_back(StackEntryTy); - for (size_t I = 0; I != Roots.size(); I++) - EltTys.push_back(Roots[I].second->getAllocatedType()); - - return StructType::create(EltTys, "gc_stackentry."+F.getName().str()); -} - -/// doInitialization - If this module uses the GC intrinsics, find them now. If -/// not, exit fast. -bool ShadowStackGC::initializeCustomLowering(Module &M) { - // struct FrameMap { - // int32_t NumRoots; // Number of roots in stack frame. - // int32_t NumMeta; // Number of metadata descriptors. May be < NumRoots. - // void *Meta[]; // May be absent for roots without metadata. - // }; - std::vector<Type*> EltTys; - // 32 bits is ok up to a 32GB stack frame. :) - EltTys.push_back(Type::getInt32Ty(M.getContext())); - // Specifies length of variable length array. - EltTys.push_back(Type::getInt32Ty(M.getContext())); - FrameMapTy = StructType::create(EltTys, "gc_map"); - PointerType *FrameMapPtrTy = PointerType::getUnqual(FrameMapTy); - - // struct StackEntry { - // ShadowStackEntry *Next; // Caller's stack entry. - // FrameMap *Map; // Pointer to constant FrameMap. - // void *Roots[]; // Stack roots (in-place array, so we pretend). - // }; - - StackEntryTy = StructType::create(M.getContext(), "gc_stackentry"); - - EltTys.clear(); - EltTys.push_back(PointerType::getUnqual(StackEntryTy)); - EltTys.push_back(FrameMapPtrTy); - StackEntryTy->setBody(EltTys); - PointerType *StackEntryPtrTy = PointerType::getUnqual(StackEntryTy); - - // Get the root chain if it already exists. - Head = M.getGlobalVariable("llvm_gc_root_chain"); - if (!Head) { - // If the root chain does not exist, insert a new one with linkonce - // linkage! - Head = new GlobalVariable(M, StackEntryPtrTy, false, - GlobalValue::LinkOnceAnyLinkage, - Constant::getNullValue(StackEntryPtrTy), - "llvm_gc_root_chain"); - } else if (Head->hasExternalLinkage() && Head->isDeclaration()) { - Head->setInitializer(Constant::getNullValue(StackEntryPtrTy)); - Head->setLinkage(GlobalValue::LinkOnceAnyLinkage); - } - - return true; -} - -bool ShadowStackGC::IsNullValue(Value *V) { - if (Constant *C = dyn_cast<Constant>(V)) - return C->isNullValue(); - return false; -} - -void ShadowStackGC::CollectRoots(Function &F) { - // FIXME: Account for original alignment. Could fragment the root array. - // Approach 1: Null initialize empty slots at runtime. Yuck. - // Approach 2: Emit a map of the array instead of just a count. - - assert(Roots.empty() && "Not cleaned up?"); - - SmallVector<std::pair<CallInst*, AllocaInst*>, 16> MetaRoots; - - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) - for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) - if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++)) - if (Function *F = CI->getCalledFunction()) - if (F->getIntrinsicID() == Intrinsic::gcroot) { - std::pair<CallInst*, AllocaInst*> Pair = std::make_pair( - CI, cast<AllocaInst>(CI->getArgOperand(0)->stripPointerCasts())); - if (IsNullValue(CI->getArgOperand(1))) - Roots.push_back(Pair); - else - MetaRoots.push_back(Pair); - } - - // Number roots with metadata (usually empty) at the beginning, so that the - // FrameMap::Meta array can be elided. - Roots.insert(Roots.begin(), MetaRoots.begin(), MetaRoots.end()); -} - -GetElementPtrInst * -ShadowStackGC::CreateGEP(LLVMContext &Context, IRBuilder<> &B, Value *BasePtr, - int Idx, int Idx2, const char *Name) { - Value *Indices[] = { ConstantInt::get(Type::getInt32Ty(Context), 0), - ConstantInt::get(Type::getInt32Ty(Context), Idx), - ConstantInt::get(Type::getInt32Ty(Context), Idx2) }; - Value* Val = B.CreateGEP(BasePtr, Indices, Name); - - assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant"); - - return dyn_cast<GetElementPtrInst>(Val); -} - -GetElementPtrInst * -ShadowStackGC::CreateGEP(LLVMContext &Context, IRBuilder<> &B, Value *BasePtr, - int Idx, const char *Name) { - Value *Indices[] = { ConstantInt::get(Type::getInt32Ty(Context), 0), - ConstantInt::get(Type::getInt32Ty(Context), Idx) }; - Value *Val = B.CreateGEP(BasePtr, Indices, Name); - - assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant"); - - return dyn_cast<GetElementPtrInst>(Val); -} - -/// runOnFunction - Insert code to maintain the shadow stack. -bool ShadowStackGC::performCustomLowering(Function &F) { - LLVMContext &Context = F.getContext(); - - // Find calls to llvm.gcroot. - CollectRoots(F); - - // If there are no roots in this function, then there is no need to add a - // stack map entry for it. - if (Roots.empty()) - return false; - - // Build the constant map and figure the type of the shadow stack entry. - Value *FrameMap = GetFrameMap(F); - Type *ConcreteStackEntryTy = GetConcreteStackEntryType(F); - - // Build the shadow stack entry at the very start of the function. - BasicBlock::iterator IP = F.getEntryBlock().begin(); - IRBuilder<> AtEntry(IP->getParent(), IP); - - Instruction *StackEntry = AtEntry.CreateAlloca(ConcreteStackEntryTy, nullptr, - "gc_frame"); - - while (isa<AllocaInst>(IP)) ++IP; - AtEntry.SetInsertPoint(IP->getParent(), IP); - - // Initialize the map pointer and load the current head of the shadow stack. - Instruction *CurrentHead = AtEntry.CreateLoad(Head, "gc_currhead"); - Instruction *EntryMapPtr = CreateGEP(Context, AtEntry, StackEntry, - 0,1,"gc_frame.map"); - AtEntry.CreateStore(FrameMap, EntryMapPtr); - - // After all the allocas... - for (unsigned I = 0, E = Roots.size(); I != E; ++I) { - // For each root, find the corresponding slot in the aggregate... - Value *SlotPtr = CreateGEP(Context, AtEntry, StackEntry, 1 + I, "gc_root"); - - // And use it in lieu of the alloca. - AllocaInst *OriginalAlloca = Roots[I].second; - SlotPtr->takeName(OriginalAlloca); - OriginalAlloca->replaceAllUsesWith(SlotPtr); - } - - // Move past the original stores inserted by GCStrategy::InitRoots. This isn't - // really necessary (the collector would never see the intermediate state at - // runtime), but it's nicer not to push the half-initialized entry onto the - // shadow stack. - while (isa<StoreInst>(IP)) ++IP; - AtEntry.SetInsertPoint(IP->getParent(), IP); - - // Push the entry onto the shadow stack. - Instruction *EntryNextPtr = CreateGEP(Context, AtEntry, - StackEntry,0,0,"gc_frame.next"); - Instruction *NewHeadVal = CreateGEP(Context, AtEntry, - StackEntry, 0, "gc_newhead"); - AtEntry.CreateStore(CurrentHead, EntryNextPtr); - AtEntry.CreateStore(NewHeadVal, Head); - - // For each instruction that escapes... - EscapeEnumerator EE(F, "gc_cleanup"); - while (IRBuilder<> *AtExit = EE.Next()) { - // Pop the entry from the shadow stack. Don't reuse CurrentHead from - // AtEntry, since that would make the value live for the entire function. - Instruction *EntryNextPtr2 = CreateGEP(Context, *AtExit, StackEntry, 0, 0, - "gc_frame.next"); - Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead"); - AtExit->CreateStore(SavedHead, Head); - } - - // Delete the original allocas (which are no longer used) and the intrinsic - // calls (which are no longer valid). Doing this last avoids invalidating - // iterators. - for (unsigned I = 0, E = Roots.size(); I != E; ++I) { - Roots[I].first->eraseFromParent(); - Roots[I].second->eraseFromParent(); - } - - Roots.clear(); - return true; -} diff --git a/lib/CodeGen/ShadowStackGCLowering.cpp b/lib/CodeGen/ShadowStackGCLowering.cpp new file mode 100644 index 0000000..f6393a5 --- /dev/null +++ b/lib/CodeGen/ShadowStackGCLowering.cpp @@ -0,0 +1,457 @@ +//===-- ShadowStackGCLowering.cpp - Custom lowering for shadow-stack gc ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the custom lowering code required by the shadow-stack GC +// strategy. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" + +using namespace llvm; + +#define DEBUG_TYPE "shadowstackgclowering" + +namespace { + +class ShadowStackGCLowering : public FunctionPass { + /// RootChain - This is the global linked-list that contains the chain of GC + /// roots. + GlobalVariable *Head; + + /// StackEntryTy - Abstract type of a link in the shadow stack. + /// + StructType *StackEntryTy; + StructType *FrameMapTy; + + /// Roots - GC roots in the current function. Each is a pair of the + /// intrinsic call and its corresponding alloca. + std::vector<std::pair<CallInst *, AllocaInst *>> Roots; + +public: + static char ID; + ShadowStackGCLowering(); + + bool doInitialization(Module &M) override; + bool runOnFunction(Function &F) override; + +private: + bool IsNullValue(Value *V); + Constant *GetFrameMap(Function &F); + Type *GetConcreteStackEntryType(Function &F); + void CollectRoots(Function &F); + static GetElementPtrInst *CreateGEP(LLVMContext &Context, IRBuilder<> &B, + Value *BasePtr, int Idx1, + const char *Name); + static GetElementPtrInst *CreateGEP(LLVMContext &Context, IRBuilder<> &B, + Value *BasePtr, int Idx1, int Idx2, + const char *Name); +}; +} + +INITIALIZE_PASS_BEGIN(ShadowStackGCLowering, "shadow-stack-gc-lowering", + "Shadow Stack GC Lowering", false, false) +INITIALIZE_PASS_DEPENDENCY(GCModuleInfo) +INITIALIZE_PASS_END(ShadowStackGCLowering, "shadow-stack-gc-lowering", + "Shadow Stack GC Lowering", false, false) + +FunctionPass *llvm::createShadowStackGCLoweringPass() { return new ShadowStackGCLowering(); } + +char ShadowStackGCLowering::ID = 0; + +ShadowStackGCLowering::ShadowStackGCLowering() + : FunctionPass(ID), Head(nullptr), StackEntryTy(nullptr), + FrameMapTy(nullptr) { + initializeShadowStackGCLoweringPass(*PassRegistry::getPassRegistry()); +} + +namespace { +/// EscapeEnumerator - This is a little algorithm to find all escape points +/// from a function so that "finally"-style code can be inserted. In addition +/// to finding the existing return and unwind instructions, it also (if +/// necessary) transforms any call instructions into invokes and sends them to +/// a landing pad. +/// +/// It's wrapped up in a state machine using the same transform C# uses for +/// 'yield return' enumerators, This transform allows it to be non-allocating. +class EscapeEnumerator { + Function &F; + const char *CleanupBBName; + + // State. + int State; + Function::iterator StateBB, StateE; + IRBuilder<> Builder; + +public: + EscapeEnumerator(Function &F, const char *N = "cleanup") + : F(F), CleanupBBName(N), State(0), Builder(F.getContext()) {} + + IRBuilder<> *Next() { + switch (State) { + default: + return nullptr; + + case 0: + StateBB = F.begin(); + StateE = F.end(); + State = 1; + + case 1: + // Find all 'return', 'resume', and 'unwind' instructions. + while (StateBB != StateE) { + BasicBlock *CurBB = StateBB++; + + // Branches and invokes do not escape, only unwind, resume, and return + // do. + TerminatorInst *TI = CurBB->getTerminator(); + if (!isa<ReturnInst>(TI) && !isa<ResumeInst>(TI)) + continue; + + Builder.SetInsertPoint(TI->getParent(), TI); + return &Builder; + } + + State = 2; + + // Find all 'call' instructions. + SmallVector<Instruction *, 16> Calls; + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + for (BasicBlock::iterator II = BB->begin(), EE = BB->end(); II != EE; + ++II) + if (CallInst *CI = dyn_cast<CallInst>(II)) + if (!CI->getCalledFunction() || + !CI->getCalledFunction()->getIntrinsicID()) + Calls.push_back(CI); + + if (Calls.empty()) + return nullptr; + + // Create a cleanup block. + LLVMContext &C = F.getContext(); + BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F); + Type *ExnTy = + StructType::get(Type::getInt8PtrTy(C), Type::getInt32Ty(C), nullptr); + Constant *PersFn = F.getParent()->getOrInsertFunction( + "__gcc_personality_v0", FunctionType::get(Type::getInt32Ty(C), true)); + LandingPadInst *LPad = + LandingPadInst::Create(ExnTy, PersFn, 1, "cleanup.lpad", CleanupBB); + LPad->setCleanup(true); + ResumeInst *RI = ResumeInst::Create(LPad, CleanupBB); + + // Transform the 'call' instructions into 'invoke's branching to the + // cleanup block. Go in reverse order to make prettier BB names. + SmallVector<Value *, 16> Args; + for (unsigned I = Calls.size(); I != 0;) { + CallInst *CI = cast<CallInst>(Calls[--I]); + + // Split the basic block containing the function call. + BasicBlock *CallBB = CI->getParent(); + BasicBlock *NewBB = + CallBB->splitBasicBlock(CI, CallBB->getName() + ".cont"); + + // Remove the unconditional branch inserted at the end of CallBB. + CallBB->getInstList().pop_back(); + NewBB->getInstList().remove(CI); + + // Create a new invoke instruction. + Args.clear(); + CallSite CS(CI); + Args.append(CS.arg_begin(), CS.arg_end()); + + InvokeInst *II = + InvokeInst::Create(CI->getCalledValue(), NewBB, CleanupBB, Args, + CI->getName(), CallBB); + II->setCallingConv(CI->getCallingConv()); + II->setAttributes(CI->getAttributes()); + CI->replaceAllUsesWith(II); + delete CI; + } + + Builder.SetInsertPoint(RI->getParent(), RI); + return &Builder; + } + } +}; +} + + +Constant *ShadowStackGCLowering::GetFrameMap(Function &F) { + // doInitialization creates the abstract type of this value. + Type *VoidPtr = Type::getInt8PtrTy(F.getContext()); + + // Truncate the ShadowStackDescriptor if some metadata is null. + unsigned NumMeta = 0; + SmallVector<Constant *, 16> Metadata; + for (unsigned I = 0; I != Roots.size(); ++I) { + Constant *C = cast<Constant>(Roots[I].first->getArgOperand(1)); + if (!C->isNullValue()) + NumMeta = I + 1; + Metadata.push_back(ConstantExpr::getBitCast(C, VoidPtr)); + } + Metadata.resize(NumMeta); + + Type *Int32Ty = Type::getInt32Ty(F.getContext()); + + Constant *BaseElts[] = { + ConstantInt::get(Int32Ty, Roots.size(), false), + ConstantInt::get(Int32Ty, NumMeta, false), + }; + + Constant *DescriptorElts[] = { + ConstantStruct::get(FrameMapTy, BaseElts), + ConstantArray::get(ArrayType::get(VoidPtr, NumMeta), Metadata)}; + + Type *EltTys[] = {DescriptorElts[0]->getType(), DescriptorElts[1]->getType()}; + StructType *STy = StructType::create(EltTys, "gc_map." + utostr(NumMeta)); + + Constant *FrameMap = ConstantStruct::get(STy, DescriptorElts); + + // FIXME: Is this actually dangerous as WritingAnLLVMPass.html claims? Seems + // that, short of multithreaded LLVM, it should be safe; all that is + // necessary is that a simple Module::iterator loop not be invalidated. + // Appending to the GlobalVariable list is safe in that sense. + // + // All of the output passes emit globals last. The ExecutionEngine + // explicitly supports adding globals to the module after + // initialization. + // + // Still, if it isn't deemed acceptable, then this transformation needs + // to be a ModulePass (which means it cannot be in the 'llc' pipeline + // (which uses a FunctionPassManager (which segfaults (not asserts) if + // provided a ModulePass))). + Constant *GV = new GlobalVariable(*F.getParent(), FrameMap->getType(), true, + GlobalVariable::InternalLinkage, FrameMap, + "__gc_" + F.getName()); + + Constant *GEPIndices[2] = { + ConstantInt::get(Type::getInt32Ty(F.getContext()), 0), + ConstantInt::get(Type::getInt32Ty(F.getContext()), 0)}; + return ConstantExpr::getGetElementPtr(GV, GEPIndices); +} + +Type *ShadowStackGCLowering::GetConcreteStackEntryType(Function &F) { + // doInitialization creates the generic version of this type. + std::vector<Type *> EltTys; + EltTys.push_back(StackEntryTy); + for (size_t I = 0; I != Roots.size(); I++) + EltTys.push_back(Roots[I].second->getAllocatedType()); + + return StructType::create(EltTys, "gc_stackentry." + F.getName().str()); +} + +/// doInitialization - If this module uses the GC intrinsics, find them now. If +/// not, exit fast. +bool ShadowStackGCLowering::doInitialization(Module &M) { + bool Active = false; + for (Function &F : M) { + if (F.hasGC() && F.getGC() == std::string("shadow-stack")) { + Active = true; + break; + } + } + if (!Active) + return false; + + // struct FrameMap { + // int32_t NumRoots; // Number of roots in stack frame. + // int32_t NumMeta; // Number of metadata descriptors. May be < NumRoots. + // void *Meta[]; // May be absent for roots without metadata. + // }; + std::vector<Type *> EltTys; + // 32 bits is ok up to a 32GB stack frame. :) + EltTys.push_back(Type::getInt32Ty(M.getContext())); + // Specifies length of variable length array. + EltTys.push_back(Type::getInt32Ty(M.getContext())); + FrameMapTy = StructType::create(EltTys, "gc_map"); + PointerType *FrameMapPtrTy = PointerType::getUnqual(FrameMapTy); + + // struct StackEntry { + // ShadowStackEntry *Next; // Caller's stack entry. + // FrameMap *Map; // Pointer to constant FrameMap. + // void *Roots[]; // Stack roots (in-place array, so we pretend). + // }; + + StackEntryTy = StructType::create(M.getContext(), "gc_stackentry"); + + EltTys.clear(); + EltTys.push_back(PointerType::getUnqual(StackEntryTy)); + EltTys.push_back(FrameMapPtrTy); + StackEntryTy->setBody(EltTys); + PointerType *StackEntryPtrTy = PointerType::getUnqual(StackEntryTy); + + // Get the root chain if it already exists. + Head = M.getGlobalVariable("llvm_gc_root_chain"); + if (!Head) { + // If the root chain does not exist, insert a new one with linkonce + // linkage! + Head = new GlobalVariable( + M, StackEntryPtrTy, false, GlobalValue::LinkOnceAnyLinkage, + Constant::getNullValue(StackEntryPtrTy), "llvm_gc_root_chain"); + } else if (Head->hasExternalLinkage() && Head->isDeclaration()) { + Head->setInitializer(Constant::getNullValue(StackEntryPtrTy)); + Head->setLinkage(GlobalValue::LinkOnceAnyLinkage); + } + + return true; +} + +bool ShadowStackGCLowering::IsNullValue(Value *V) { + if (Constant *C = dyn_cast<Constant>(V)) + return C->isNullValue(); + return false; +} + +void ShadowStackGCLowering::CollectRoots(Function &F) { + // FIXME: Account for original alignment. Could fragment the root array. + // Approach 1: Null initialize empty slots at runtime. Yuck. + // Approach 2: Emit a map of the array instead of just a count. + + assert(Roots.empty() && "Not cleaned up?"); + + SmallVector<std::pair<CallInst *, AllocaInst *>, 16> MetaRoots; + + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) + if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++)) + if (Function *F = CI->getCalledFunction()) + if (F->getIntrinsicID() == Intrinsic::gcroot) { + std::pair<CallInst *, AllocaInst *> Pair = std::make_pair( + CI, + cast<AllocaInst>(CI->getArgOperand(0)->stripPointerCasts())); + if (IsNullValue(CI->getArgOperand(1))) + Roots.push_back(Pair); + else + MetaRoots.push_back(Pair); + } + + // Number roots with metadata (usually empty) at the beginning, so that the + // FrameMap::Meta array can be elided. + Roots.insert(Roots.begin(), MetaRoots.begin(), MetaRoots.end()); +} + +GetElementPtrInst *ShadowStackGCLowering::CreateGEP(LLVMContext &Context, + IRBuilder<> &B, Value *BasePtr, + int Idx, int Idx2, + const char *Name) { + Value *Indices[] = {ConstantInt::get(Type::getInt32Ty(Context), 0), + ConstantInt::get(Type::getInt32Ty(Context), Idx), + ConstantInt::get(Type::getInt32Ty(Context), Idx2)}; + Value *Val = B.CreateGEP(BasePtr, Indices, Name); + + assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant"); + + return dyn_cast<GetElementPtrInst>(Val); +} + +GetElementPtrInst *ShadowStackGCLowering::CreateGEP(LLVMContext &Context, + IRBuilder<> &B, Value *BasePtr, + int Idx, const char *Name) { + Value *Indices[] = {ConstantInt::get(Type::getInt32Ty(Context), 0), + ConstantInt::get(Type::getInt32Ty(Context), Idx)}; + Value *Val = B.CreateGEP(BasePtr, Indices, Name); + + assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant"); + + return dyn_cast<GetElementPtrInst>(Val); +} + +/// runOnFunction - Insert code to maintain the shadow stack. +bool ShadowStackGCLowering::runOnFunction(Function &F) { + // Quick exit for functions that do not use the shadow stack GC. + if (!F.hasGC() || + F.getGC() != std::string("shadow-stack")) + return false; + + LLVMContext &Context = F.getContext(); + + // Find calls to llvm.gcroot. + CollectRoots(F); + + // If there are no roots in this function, then there is no need to add a + // stack map entry for it. + if (Roots.empty()) + return false; + + // Build the constant map and figure the type of the shadow stack entry. + Value *FrameMap = GetFrameMap(F); + Type *ConcreteStackEntryTy = GetConcreteStackEntryType(F); + + // Build the shadow stack entry at the very start of the function. + BasicBlock::iterator IP = F.getEntryBlock().begin(); + IRBuilder<> AtEntry(IP->getParent(), IP); + + Instruction *StackEntry = + AtEntry.CreateAlloca(ConcreteStackEntryTy, nullptr, "gc_frame"); + + while (isa<AllocaInst>(IP)) + ++IP; + AtEntry.SetInsertPoint(IP->getParent(), IP); + + // Initialize the map pointer and load the current head of the shadow stack. + Instruction *CurrentHead = AtEntry.CreateLoad(Head, "gc_currhead"); + Instruction *EntryMapPtr = + CreateGEP(Context, AtEntry, StackEntry, 0, 1, "gc_frame.map"); + AtEntry.CreateStore(FrameMap, EntryMapPtr); + + // After all the allocas... + for (unsigned I = 0, E = Roots.size(); I != E; ++I) { + // For each root, find the corresponding slot in the aggregate... + Value *SlotPtr = CreateGEP(Context, AtEntry, StackEntry, 1 + I, "gc_root"); + + // And use it in lieu of the alloca. + AllocaInst *OriginalAlloca = Roots[I].second; + SlotPtr->takeName(OriginalAlloca); + OriginalAlloca->replaceAllUsesWith(SlotPtr); + } + + // Move past the original stores inserted by GCStrategy::InitRoots. This isn't + // really necessary (the collector would never see the intermediate state at + // runtime), but it's nicer not to push the half-initialized entry onto the + // shadow stack. + while (isa<StoreInst>(IP)) + ++IP; + AtEntry.SetInsertPoint(IP->getParent(), IP); + + // Push the entry onto the shadow stack. + Instruction *EntryNextPtr = + CreateGEP(Context, AtEntry, StackEntry, 0, 0, "gc_frame.next"); + Instruction *NewHeadVal = + CreateGEP(Context, AtEntry, StackEntry, 0, "gc_newhead"); + AtEntry.CreateStore(CurrentHead, EntryNextPtr); + AtEntry.CreateStore(NewHeadVal, Head); + + // For each instruction that escapes... + EscapeEnumerator EE(F, "gc_cleanup"); + while (IRBuilder<> *AtExit = EE.Next()) { + // Pop the entry from the shadow stack. Don't reuse CurrentHead from + // AtEntry, since that would make the value live for the entire function. + Instruction *EntryNextPtr2 = + CreateGEP(Context, *AtExit, StackEntry, 0, 0, "gc_frame.next"); + Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead"); + AtExit->CreateStore(SavedHead, Head); + } + + // Delete the original allocas (which are no longer used) and the intrinsic + // calls (which are no longer valid). Doing this last avoids invalidating + // iterators. + for (unsigned I = 0, E = Roots.size(); I != E; ++I) { + Roots[I].first->eraseFromParent(); + Roots[I].second->eraseFromParent(); + } + + Roots.clear(); + return true; +} diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index 7fd8107..35e4292 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -191,7 +191,7 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F, // Create an alloca for the incoming jump buffer ptr and the new jump buffer // that needs to be restored on all exits from the function. This is an alloca // because the value needs to be added to the global context list. - const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering(); + const TargetLowering *TLI = TM->getSubtargetImpl(F)->getTargetLowering(); unsigned Align = TLI->getDataLayout()->getPrefTypeAlignment(FunctionContextTy); FuncCtx = new AllocaInst(FunctionContextTy, nullptr, Align, "fn_context", diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index ea7b914..dab1dfe 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -120,10 +120,9 @@ void SplitAnalysis::analyzeUses() { // First get all the defs from the interval values. This provides the correct // slots for early clobbers. - for (LiveInterval::const_vni_iterator I = CurLI->vni_begin(), - E = CurLI->vni_end(); I != E; ++I) - if (!(*I)->isPHIDef() && !(*I)->isUnused()) - UseSlots.push_back((*I)->def); + for (const VNInfo *VNI : CurLI->valnos) + if (!VNI->isPHIDef() && !VNI->isUnused()) + UseSlots.push_back(VNI->def); // Get use slots form the use-def chain. const MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -624,8 +623,7 @@ void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) { AssignI.setMap(RegAssign); for (unsigned i = 0, e = Copies.size(); i != e; ++i) { - VNInfo *VNI = Copies[i]; - SlotIndex Def = VNI->def; + SlotIndex Def = Copies[i]->def; MachineInstr *MI = LIS.getInstructionFromIndex(Def); assert(MI && "No instruction for back-copy"); @@ -636,13 +634,12 @@ void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) { while (!AtBegin && (--MBBI)->isDebugValue()); DEBUG(dbgs() << "Removing " << Def << '\t' << *MI); - LI->removeValNo(VNI); + LIS.removeVRegDefAt(*LI, Def); LIS.RemoveMachineInstrFromMaps(MI); MI->eraseFromParent(); - // Adjust RegAssign if a register assignment is killed at VNI->def. We - // want to avoid calculating the live range of the source register if - // possible. + // Adjust RegAssign if a register assignment is killed at Def. We want to + // avoid calculating the live range of the source register if possible. AssignI.find(Def.getPrevSlot()); if (!AssignI.valid() || AssignI.start() >= Def) continue; @@ -727,9 +724,7 @@ void SplitEditor::hoistCopiesForSize() { // Find the nearest common dominator for parent values with multiple // back-copies. If a single back-copy dominates, put it in DomPair.second. - for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end(); - VI != VE; ++VI) { - VNInfo *VNI = *VI; + for (VNInfo *VNI : LI->valnos) { if (VNI->isUnused()) continue; VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def); @@ -802,9 +797,7 @@ void SplitEditor::hoistCopiesForSize() { // Remove redundant back-copies that are now known to be dominated by another // def with the same value. SmallVector<VNInfo*, 8> BackCopies; - for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end(); - VI != VE; ++VI) { - VNInfo *VNI = *VI; + for (VNInfo *VNI : LI->valnos) { if (VNI->isUnused()) continue; VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def); @@ -823,16 +816,15 @@ void SplitEditor::hoistCopiesForSize() { bool SplitEditor::transferValues() { bool Skipped = false; RegAssignMap::const_iterator AssignI = RegAssign.begin(); - for (LiveInterval::const_iterator ParentI = Edit->getParent().begin(), - ParentE = Edit->getParent().end(); ParentI != ParentE; ++ParentI) { - DEBUG(dbgs() << " blit " << *ParentI << ':'); - VNInfo *ParentVNI = ParentI->valno; + for (const LiveRange::Segment &S : Edit->getParent()) { + DEBUG(dbgs() << " blit " << S << ':'); + VNInfo *ParentVNI = S.valno; // RegAssign has holes where RegIdx 0 should be used. - SlotIndex Start = ParentI->start; + SlotIndex Start = S.start; AssignI.advanceTo(Start); do { unsigned RegIdx; - SlotIndex End = ParentI->end; + SlotIndex End = S.end; if (!AssignI.valid()) { RegIdx = 0; } else if (AssignI.start() <= Start) { @@ -917,7 +909,7 @@ bool SplitEditor::transferValues() { ++MBB; } Start = End; - } while (Start != ParentI->end); + } while (Start != S.end); DEBUG(dbgs() << '\n'); } @@ -930,9 +922,7 @@ bool SplitEditor::transferValues() { void SplitEditor::extendPHIKillRanges() { // Extend live ranges to be live-out for successor PHI values. - for (LiveInterval::const_vni_iterator I = Edit->getParent().vni_begin(), - E = Edit->getParent().vni_end(); I != E; ++I) { - const VNInfo *PHIVNI = *I; + for (const VNInfo *PHIVNI : Edit->getParent().valnos) { if (PHIVNI->isUnused() || !PHIVNI->isPHIDef()) continue; unsigned RegIdx = RegAssign.lookup(PHIVNI->def); @@ -1006,12 +996,11 @@ void SplitEditor::deleteRematVictims() { SmallVector<MachineInstr*, 8> Dead; for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I){ LiveInterval *LI = &LIS.getInterval(*I); - for (LiveInterval::const_iterator LII = LI->begin(), LIE = LI->end(); - LII != LIE; ++LII) { + for (const LiveRange::Segment &S : LI->segments) { // Dead defs end at the dead slot. - if (LII->end != LII->valno->def.getDeadSlot()) + if (S.end != S.valno->def.getDeadSlot()) continue; - MachineInstr *MI = LIS.getInstructionFromIndex(LII->valno->def); + MachineInstr *MI = LIS.getInstructionFromIndex(S.valno->def); assert(MI && "Missing instruction for dead def"); MI->addRegisterDead(LI->reg, &TRI); @@ -1036,9 +1025,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { // the inserted copies. // Add the original defs from the parent interval. - for (LiveInterval::const_vni_iterator I = Edit->getParent().vni_begin(), - E = Edit->getParent().vni_end(); I != E; ++I) { - const VNInfo *ParentVNI = *I; + for (const VNInfo *ParentVNI : Edit->getParent().valnos) { if (ParentVNI->isUnused()) continue; unsigned RegIdx = RegAssign.lookup(ParentVNI->def); diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp index dcf1b44..faf94b6 100644 --- a/lib/CodeGen/StackColoring.cpp +++ b/lib/CodeGen/StackColoring.cpp @@ -463,7 +463,8 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { if (!VI.Var) continue; if (SlotRemap.count(VI.Slot)) { - DEBUG(dbgs()<<"Remapping debug info for ["<<VI.Var->getName()<<"].\n"); + DEBUG(dbgs() << "Remapping debug info for [" + << DIVariable(VI.Var).getName() << "].\n"); VI.Slot = SlotRemap[VI.Slot]; FixedDbg++; } diff --git a/lib/CodeGen/StackMapLivenessAnalysis.cpp b/lib/CodeGen/StackMapLivenessAnalysis.cpp index c2ee87a..767f43a 100644 --- a/lib/CodeGen/StackMapLivenessAnalysis.cpp +++ b/lib/CodeGen/StackMapLivenessAnalysis.cpp @@ -123,5 +123,7 @@ uint32_t *StackMapLiveness::createRegisterMask() const { for (LivePhysRegs::const_iterator RI = LiveRegs.begin(), RE = LiveRegs.end(); RI != RE; ++RI) Mask[*RI / 32] |= 1U << (*RI % 32); + + TRI->adjustStackMapLiveOutMask(Mask); return Mask; } diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp index d3791c3..5d46419 100644 --- a/lib/CodeGen/StackMaps.cpp +++ b/lib/CodeGen/StackMaps.cpp @@ -84,8 +84,7 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI, switch (MOI->getImm()) { default: llvm_unreachable("Unrecognized operand type."); case StackMaps::DirectMemRefOp: { - unsigned Size = - AP.TM.getSubtargetImpl()->getDataLayout()->getPointerSizeInBits(); + unsigned Size = AP.TM.getDataLayout()->getPointerSizeInBits(); assert((Size % 8) == 0 && "Need pointer size in bytes."); Size /= 8; unsigned Reg = (++MOI)->getReg(); @@ -241,7 +240,7 @@ void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID, // entry. const MCExpr *CSOffsetExpr = MCBinaryExpr::CreateSub( MCSymbolRefExpr::Create(MILabel, OutContext), - MCSymbolRefExpr::Create(AP.CurrentFnSym, OutContext), + MCSymbolRefExpr::Create(AP.CurrentFnSymForSize, OutContext), OutContext); CSInfos.emplace_back(CSOffsetExpr, ID, std::move(Locations), @@ -286,6 +285,18 @@ void StackMaps::recordPatchPoint(const MachineInstr &MI) { } #endif } +void StackMaps::recordStatepoint(const MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::STATEPOINT && + "expected statepoint"); + + StatepointOpers opers(&MI); + // Record all the deopt and gc operands (they're contiguous and run from the + // initial index to the end of the operand list) + const unsigned StartIdx = opers.getVarIdx(); + recordStackMapOpers(MI, 0xABCDEF00, + MI.operands_begin() + StartIdx, MI.operands_end(), + false); +} /// Emit the stackmap header. /// diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index 45f97ac..d1fae95 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/StackProtector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/Passes.h" @@ -31,6 +32,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -86,10 +88,9 @@ bool StackProtector::runOnFunction(Function &Fn) { DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); DT = DTWP ? &DTWP->getDomTree() : nullptr; - TLI = TM->getSubtargetImpl()->getTargetLowering(); + TLI = TM->getSubtargetImpl(Fn)->getTargetLowering(); - Attribute Attr = Fn.getAttributes().getAttribute( - AttributeSet::FunctionIndex, "stack-protector-buffer-size"); + Attribute Attr = Fn.getFnAttribute("stack-protector-buffer-size"); if (Attr.isStringAttribute() && Attr.getValueAsString().getAsInteger(10, SSPBufferSize)) return false; // Invalid integer string @@ -199,31 +200,24 @@ bool StackProtector::HasAddressTaken(const Instruction *AI) { bool StackProtector::RequiresStackProtector() { bool Strong = false; bool NeedsProtector = false; - if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::StackProtectReq)) { + if (F->hasFnAttribute(Attribute::StackProtectReq)) { NeedsProtector = true; Strong = true; // Use the same heuristic as strong to determine SSPLayout - } else if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::StackProtectStrong)) + } else if (F->hasFnAttribute(Attribute::StackProtectStrong)) Strong = true; - else if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::StackProtect)) + else if (!F->hasFnAttribute(Attribute::StackProtect)) return false; - for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { - BasicBlock *BB = I; - - for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE; - ++II) { - if (AllocaInst *AI = dyn_cast<AllocaInst>(II)) { + for (const BasicBlock &BB : *F) { + for (const Instruction &I : BB) { + if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) { if (AI->isArrayAllocation()) { // SSP-Strong: Enable protectors for any call to alloca, regardless // of size. if (Strong) return true; - if (const ConstantInt *CI = - dyn_cast<ConstantInt>(AI->getArraySize())) { + if (const auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) { if (CI->getLimitedValue(SSPBufferSize) >= SSPBufferSize) { // A call to alloca with size >= SSPBufferSize requires // stack protectors. @@ -335,7 +329,7 @@ static CallInst *FindPotentialTailCall(BasicBlock *BB, ReturnInst *RI, /// Returns true if the platform/triple supports the stackprotectorcreate pseudo /// node. static bool CreatePrologue(Function *F, Module *M, ReturnInst *RI, - const TargetLoweringBase *TLI, const Triple &Trip, + const TargetLoweringBase *TLI, const Triple &TT, AllocaInst *&AI, Value *&StackGuardVar) { bool SupportsSelectionDAGSP = false; PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext()); @@ -344,9 +338,10 @@ static bool CreatePrologue(Function *F, Module *M, ReturnInst *RI, Constant *OffsetVal = ConstantInt::get(Type::getInt32Ty(RI->getContext()), Offset); - StackGuardVar = ConstantExpr::getIntToPtr( - OffsetVal, PointerType::get(PtrTy, AddressSpace)); - } else if (Trip.getOS() == llvm::Triple::OpenBSD) { + StackGuardVar = + ConstantExpr::getIntToPtr(OffsetVal, PointerType::get(PtrTy, + AddressSpace)); + } else if (TT.isOSOpenBSD()) { StackGuardVar = M->getOrInsertGlobal("__guard_local", PtrTy); cast<GlobalValue>(StackGuardVar) ->setVisibility(GlobalValue::HiddenVisibility); @@ -399,14 +394,13 @@ bool StackProtector::InsertStackProtectors() { InsertionPt = RI; // At this point we know that BB has a return statement so it *DOES* // have a terminator. - assert(InsertionPt != nullptr && "BB must have a terminator instruction at " - "this point."); + assert(InsertionPt != nullptr && + "BB must have a terminator instruction at this point."); } Function *Intrinsic = Intrinsic::getDeclaration(M, Intrinsic::stackprotectorcheck); CallInst::Create(Intrinsic, StackGuardVar, "", InsertionPt); - } else { // If we do not support SelectionDAG based tail calls, generate IR level // tail calls. @@ -459,11 +453,17 @@ bool StackProtector::InsertStackProtectors() { LoadInst *LI1 = B.CreateLoad(StackGuardVar); LoadInst *LI2 = B.CreateLoad(AI); Value *Cmp = B.CreateICmpEQ(LI1, LI2); - B.CreateCondBr(Cmp, NewBB, FailBB); + unsigned SuccessWeight = + BranchProbabilityInfo::getBranchWeightStackProtector(true); + unsigned FailureWeight = + BranchProbabilityInfo::getBranchWeightStackProtector(false); + MDNode *Weights = MDBuilder(F->getContext()) + .createBranchWeights(SuccessWeight, FailureWeight); + B.CreateCondBr(Cmp, NewBB, FailBB, Weights); } } - // Return if we didn't modify any basic blocks. I.e., there are no return + // Return if we didn't modify any basic blocks. i.e., there are no return // statements in the function. if (!HasPrologue) return false; @@ -477,15 +477,17 @@ BasicBlock *StackProtector::CreateFailBB() { LLVMContext &Context = F->getContext(); BasicBlock *FailBB = BasicBlock::Create(Context, "CallStackCheckFailBlk", F); IRBuilder<> B(FailBB); - if (Trip.getOS() == llvm::Triple::OpenBSD) { - Constant *StackChkFail = M->getOrInsertFunction( - "__stack_smash_handler", Type::getVoidTy(Context), - Type::getInt8PtrTy(Context), nullptr); + if (Trip.isOSOpenBSD()) { + Constant *StackChkFail = + M->getOrInsertFunction("__stack_smash_handler", + Type::getVoidTy(Context), + Type::getInt8PtrTy(Context), nullptr); B.CreateCall(StackChkFail, B.CreateGlobalStringPtr(F->getName(), "SSH")); } else { - Constant *StackChkFail = M->getOrInsertFunction( - "__stack_chk_fail", Type::getVoidTy(Context), nullptr); + Constant *StackChkFail = + M->getOrInsertFunction("__stack_chk_fail", Type::getVoidTy(Context), + nullptr); B.CreateCall(StackChkFail); } B.CreateUnreachable(); diff --git a/lib/CodeGen/StatepointExampleGC.cpp b/lib/CodeGen/StatepointExampleGC.cpp new file mode 100644 index 0000000..95dfd75 --- /dev/null +++ b/lib/CodeGen/StatepointExampleGC.cpp @@ -0,0 +1,55 @@ +//===-- StatepointDefaultGC.cpp - The default statepoint GC strategy ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a GCStrategy which serves as an example for the usage +// of a statepoint based lowering strategy. This GCStrategy is intended to +// suitable as a default implementation usable with any collector which can +// consume the standard stackmap format generated by statepoints, uses the +// default addrespace to distinguish between gc managed and non-gc managed +// pointers, and has reasonable relocation semantics. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Value.h" + +using namespace llvm; + +namespace { +class StatepointGC : public GCStrategy { +public: + StatepointGC() { + UseStatepoints = true; + // These options are all gc.root specific, we specify them so that the + // gc.root lowering code doesn't run. + InitRoots = false; + NeededSafePoints = 0; + UsesMetadata = false; + CustomRoots = false; + } + Optional<bool> isGCManagedPointer(const Value *V) const override { + // Method is only valid on pointer typed values. + PointerType *PT = cast<PointerType>(V->getType()); + // For the sake of this example GC, we arbitrarily pick addrspace(1) as our + // GC managed heap. We know that a pointer into this heap needs to be + // updated and that no other pointer does. Note that addrspace(1) is used + // only as an example, it has no special meaning, and is not reserved for + // GC usage. + return (1 == PT->getAddressSpace()); + } +}; +} + +static GCRegistry::Add<StatepointGC> X("statepoint-example", + "an example strategy for statepoint"); + +namespace llvm { +void linkStatepointExampleGC() {} +} diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp index 4377236..04b3992 100644 --- a/lib/CodeGen/TailDuplication.cpp +++ b/lib/CodeGen/TailDuplication.cpp @@ -560,8 +560,7 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF, // compensate for the duplication. unsigned MaxDuplicateCount; if (TailDuplicateSize.getNumOccurrences() == 0 && - MF.getFunction()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize)) + MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize)) MaxDuplicateCount = 1; else MaxDuplicateCount = TailDuplicateSize; diff --git a/lib/CodeGen/TargetFrameLoweringImpl.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp index 1557d10..e3f0191 100644 --- a/lib/CodeGen/TargetFrameLoweringImpl.cpp +++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -42,3 +42,8 @@ int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF, FrameReg = RI->getFrameRegister(MF); return getFrameIndexOffset(MF, FI); } + +bool TargetFrameLowering::needsFrameIndexResolution( + const MachineFunction &MF) const { + return MF.getFrameInfo()->hasStackObjects(); +} diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp index ab45f89..2566c1f 100644 --- a/lib/CodeGen/TargetInstrInfo.cpp +++ b/lib/CodeGen/TargetInstrInfo.cpp @@ -25,6 +25,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -307,7 +308,7 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC, assert(RC->getSize() >= (Offset + Size) && "bad subregister range"); - if (!TM->getSubtargetImpl()->getDataLayout()->isLittleEndian()) { + if (!TM->getDataLayout()->isLittleEndian()) { Offset = RC->getSize() - (Offset + Size); } return true; @@ -644,6 +645,28 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI, return true; } +int TargetInstrInfo::getSPAdjust(const MachineInstr *MI) const { + const MachineFunction *MF = MI->getParent()->getParent(); + const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); + bool StackGrowsDown = + TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; + + int FrameSetupOpcode = getCallFrameSetupOpcode(); + int FrameDestroyOpcode = getCallFrameDestroyOpcode(); + + if (MI->getOpcode() != FrameSetupOpcode && + MI->getOpcode() != FrameDestroyOpcode) + return 0; + + int SPAdj = MI->getOperand(0).getImm(); + + if ((!StackGrowsDown && MI->getOpcode() == FrameSetupOpcode) || + (StackGrowsDown && MI->getOpcode() == FrameDestroyOpcode)) + SPAdj = -SPAdj; + + return SPAdj; +} + /// isSchedulingBoundary - Test if the given instruction should be /// considered a scheduling boundary. This primarily includes labels /// and terminators. diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index e833fd3..9048a44 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -414,7 +414,7 @@ static void InitLibcallNames(const char **Names, const Triple &TT) { Names[RTLIB::SINCOS_PPCF128] = nullptr; } - if (TT.getOS() != Triple::OpenBSD) { + if (!TT.isOSOpenBSD()) { Names[RTLIB::STACKPROTECTOR_CHECK_FAIL] = "__stack_chk_fail"; } else { // These are generally not available. @@ -696,7 +696,7 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) { /// NOTE: The TargetMachine owns TLOF. TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) - : TM(tm), DL(TM.getSubtargetImpl()->getDataLayout()) { + : TM(tm), DL(TM.getDataLayout()) { initActions(); // Perform these initializations only once. @@ -710,10 +710,12 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) HasMultipleConditionRegisters = false; HasExtractBitsInsn = false; IntDivIsCheap = false; + FsqrtIsCheap = false; Pow2SDivIsCheap = false; JumpIsExpensive = false; PredictableSelectIsExpensive = false; MaskAndBranchFoldingIsLegal = false; + EnableExtLdPromotion = false; HasFloatingPointExceptions = true; StackPointerRegisterToSaveRestore = 0; ExceptionPointerRegister = 0; @@ -747,37 +749,33 @@ void TargetLoweringBase::initActions() { memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray)); // Set default actions for various operations. - for (unsigned VT = 0; VT != (unsigned)MVT::LAST_VALUETYPE; ++VT) { + for (MVT VT : MVT::all_valuetypes()) { // Default all indexed load / store to expand. for (unsigned IM = (unsigned)ISD::PRE_INC; IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) { - setIndexedLoadAction(IM, (MVT::SimpleValueType)VT, Expand); - setIndexedStoreAction(IM, (MVT::SimpleValueType)VT, Expand); + setIndexedLoadAction(IM, VT, Expand); + setIndexedStoreAction(IM, VT, Expand); } // Most backends expect to see the node which just returns the value loaded. - setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, - (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Expand); // These operations default to expand. - setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand); - setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand); - setOperationAction(ISD::FMINNUM, (MVT::SimpleValueType)VT, Expand); - setOperationAction(ISD::FMAXNUM, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::FGETSIGN, VT, Expand); + setOperationAction(ISD::CONCAT_VECTORS, VT, Expand); + setOperationAction(ISD::FMINNUM, VT, Expand); + setOperationAction(ISD::FMAXNUM, VT, Expand); + setOperationAction(ISD::FMAD, VT, Expand); // These library functions default to expand. - setOperationAction(ISD::FROUND, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::FROUND, VT, Expand); // These operations default to expand for vector types. - if (VT >= MVT::FIRST_VECTOR_VALUETYPE && - VT <= MVT::LAST_VECTOR_VALUETYPE) { - setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand); - setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, - (MVT::SimpleValueType)VT, Expand); - setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, - (MVT::SimpleValueType)VT, Expand); - setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, - (MVT::SimpleValueType)VT, Expand); + if (VT.isVector()) { + setOperationAction(ISD::FCOPYSIGN, VT, Expand); + setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, VT, Expand); + setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Expand); + setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand); } } @@ -897,6 +895,138 @@ bool TargetLoweringBase::canOpTrap(unsigned Op, EVT VT) const { } } +TargetLoweringBase::LegalizeKind +TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const { + // If this is a simple type, use the ComputeRegisterProp mechanism. + if (VT.isSimple()) { + MVT SVT = VT.getSimpleVT(); + assert((unsigned)SVT.SimpleTy < array_lengthof(TransformToType)); + MVT NVT = TransformToType[SVT.SimpleTy]; + LegalizeTypeAction LA = ValueTypeActions.getTypeAction(SVT); + + assert((LA == TypeLegal || LA == TypeSoftenFloat || + ValueTypeActions.getTypeAction(NVT) != TypePromoteInteger) && + "Promote may not follow Expand or Promote"); + + if (LA == TypeSplitVector) + return LegalizeKind(LA, + EVT::getVectorVT(Context, SVT.getVectorElementType(), + SVT.getVectorNumElements() / 2)); + if (LA == TypeScalarizeVector) + return LegalizeKind(LA, SVT.getVectorElementType()); + return LegalizeKind(LA, NVT); + } + + // Handle Extended Scalar Types. + if (!VT.isVector()) { + assert(VT.isInteger() && "Float types must be simple"); + unsigned BitSize = VT.getSizeInBits(); + // First promote to a power-of-two size, then expand if necessary. + if (BitSize < 8 || !isPowerOf2_32(BitSize)) { + EVT NVT = VT.getRoundIntegerType(Context); + assert(NVT != VT && "Unable to round integer VT"); + LegalizeKind NextStep = getTypeConversion(Context, NVT); + // Avoid multi-step promotion. + if (NextStep.first == TypePromoteInteger) + return NextStep; + // Return rounded integer type. + return LegalizeKind(TypePromoteInteger, NVT); + } + + return LegalizeKind(TypeExpandInteger, + EVT::getIntegerVT(Context, VT.getSizeInBits() / 2)); + } + + // Handle vector types. + unsigned NumElts = VT.getVectorNumElements(); + EVT EltVT = VT.getVectorElementType(); + + // Vectors with only one element are always scalarized. + if (NumElts == 1) + return LegalizeKind(TypeScalarizeVector, EltVT); + + // Try to widen vector elements until the element type is a power of two and + // promote it to a legal type later on, for example: + // <3 x i8> -> <4 x i8> -> <4 x i32> + if (EltVT.isInteger()) { + // Vectors with a number of elements that is not a power of two are always + // widened, for example <3 x i8> -> <4 x i8>. + if (!VT.isPow2VectorType()) { + NumElts = (unsigned)NextPowerOf2(NumElts); + EVT NVT = EVT::getVectorVT(Context, EltVT, NumElts); + return LegalizeKind(TypeWidenVector, NVT); + } + + // Examine the element type. + LegalizeKind LK = getTypeConversion(Context, EltVT); + + // If type is to be expanded, split the vector. + // <4 x i140> -> <2 x i140> + if (LK.first == TypeExpandInteger) + return LegalizeKind(TypeSplitVector, + EVT::getVectorVT(Context, EltVT, NumElts / 2)); + + // Promote the integer element types until a legal vector type is found + // or until the element integer type is too big. If a legal type was not + // found, fallback to the usual mechanism of widening/splitting the + // vector. + EVT OldEltVT = EltVT; + while (1) { + // Increase the bitwidth of the element to the next pow-of-two + // (which is greater than 8 bits). + EltVT = EVT::getIntegerVT(Context, 1 + EltVT.getSizeInBits()) + .getRoundIntegerType(Context); + + // Stop trying when getting a non-simple element type. + // Note that vector elements may be greater than legal vector element + // types. Example: X86 XMM registers hold 64bit element on 32bit + // systems. + if (!EltVT.isSimple()) + break; + + // Build a new vector type and check if it is legal. + MVT NVT = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts); + // Found a legal promoted vector type. + if (NVT != MVT() && ValueTypeActions.getTypeAction(NVT) == TypeLegal) + return LegalizeKind(TypePromoteInteger, + EVT::getVectorVT(Context, EltVT, NumElts)); + } + + // Reset the type to the unexpanded type if we did not find a legal vector + // type with a promoted vector element type. + EltVT = OldEltVT; + } + + // Try to widen the vector until a legal type is found. + // If there is no wider legal type, split the vector. + while (1) { + // Round up to the next power of 2. + NumElts = (unsigned)NextPowerOf2(NumElts); + + // If there is no simple vector type with this many elements then there + // cannot be a larger legal vector type. Note that this assumes that + // there are no skipped intermediate vector types in the simple types. + if (!EltVT.isSimple()) + break; + MVT LargerVector = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts); + if (LargerVector == MVT()) + break; + + // If this type is legal then widen the vector. + if (ValueTypeActions.getTypeAction(LargerVector) == TypeLegal) + return LegalizeKind(TypeWidenVector, LargerVector); + } + + // Widen odd vectors to next power of two. + if (!VT.isPow2VectorType()) { + EVT NVT = VT.getPow2VectorType(Context); + return LegalizeKind(TypeWidenVector, NVT); + } + + // Vectors with illegal element types are expanded. + EVT NVT = EVT::getVectorVT(Context, EltVT, VT.getVectorNumElements() / 2); + return LegalizeKind(TypeSplitVector, NVT); +} static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, unsigned &NumIntermediates, @@ -992,10 +1122,15 @@ TargetLoweringBase::emitPatchPoint(MachineInstr *MI, // Add a new memory operand for this FI. const MachineFrameInfo &MFI = *MF.getFrameInfo(); assert(MFI.getObjectOffset(FI) != -1); + + unsigned Flags = MachineMemOperand::MOLoad; + if (MI->getOpcode() == TargetOpcode::STATEPOINT) { + Flags |= MachineMemOperand::MOStore; + Flags |= MachineMemOperand::MOVolatile; + } MachineMemOperand *MMO = MF.getMachineMemOperand( - MachinePointerInfo::getFixedStack(FI), MachineMemOperand::MOLoad, - TM.getSubtargetImpl()->getDataLayout()->getPointerSize(), - MFI.getObjectAlignment(FI)); + MachinePointerInfo::getFixedStack(FI), Flags, + TM.getDataLayout()->getPointerSize(), MFI.getObjectAlignment(FI)); MIB->addMemOperand(MF, MMO); // Replace the instruction and update the operand index. @@ -1009,10 +1144,9 @@ TargetLoweringBase::emitPatchPoint(MachineInstr *MI, /// findRepresentativeClass - Return the largest legal super-reg register class /// of the register class for the specified type and its associated "cost". -std::pair<const TargetRegisterClass*, uint8_t> -TargetLoweringBase::findRepresentativeClass(MVT VT) const { - const TargetRegisterInfo *TRI = - getTargetMachine().getSubtargetImpl()->getRegisterInfo(); +std::pair<const TargetRegisterClass *, uint8_t> +TargetLoweringBase::findRepresentativeClass(const TargetRegisterInfo *TRI, + MVT VT) const { const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy]; if (!RC) return std::make_pair(RC, 0); @@ -1038,7 +1172,8 @@ TargetLoweringBase::findRepresentativeClass(MVT VT) const { /// computeRegisterProperties - Once all of the register classes are added, /// this allows us to compute derived properties we expose. -void TargetLoweringBase::computeRegisterProperties() { +void TargetLoweringBase::computeRegisterProperties( + const TargetRegisterInfo *TRI) { static_assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE, "Too many value types for ValueTypeActions to hold!"); @@ -1220,7 +1355,7 @@ void TargetLoweringBase::computeRegisterProperties() { for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) { const TargetRegisterClass* RRC; uint8_t Cost; - std::tie(RRC, Cost) = findRepresentativeClass((MVT::SimpleValueType)i); + std::tie(RRC, Cost) = findRepresentativeClass(TRI, (MVT::SimpleValueType)i); RepRegClassForVT[i] = RRC; RepRegClassCostForVT[i] = Cost; } diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index efd15e1..c1b34f7 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -71,12 +71,10 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer, const MCSection *Sec = getContext().getELFSection(NameData, ELF::SHT_PROGBITS, Flags, - SectionKind::getDataRel(), 0, Label->getName()); - unsigned Size = TM.getSubtargetImpl()->getDataLayout()->getPointerSize(); + unsigned Size = TM.getDataLayout()->getPointerSize(); Streamer.SwitchSection(Sec); - Streamer.EmitValueToAlignment( - TM.getSubtargetImpl()->getDataLayout()->getPointerABIAlignment()); + Streamer.EmitValueToAlignment(TM.getDataLayout()->getPointerABIAlignment()); Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject); const MCExpr *E = MCConstantExpr::Create(Size, getContext()); Streamer.EmitELFSize(Label, E); @@ -166,9 +164,7 @@ static unsigned getELFSectionType(StringRef Name, SectionKind K) { return ELF::SHT_PROGBITS; } - -static unsigned -getELFSectionFlags(SectionKind K) { +static unsigned getELFSectionFlags(SectionKind K) { unsigned Flags = 0; if (!K.isMetadata()) @@ -183,9 +179,7 @@ getELFSectionFlags(SectionKind K) { if (K.isThreadLocal()) Flags |= ELF::SHF_TLS; - // K.isMergeableConst() is left out to honour PR4650 - if (K.isMergeableCString() || K.isMergeableConst4() || - K.isMergeableConst8() || K.isMergeableConst16()) + if (K.isMergeableCString() || K.isMergeableConst()) Flags |= ELF::SHF_MERGE; if (K.isMergeableCString()) @@ -222,120 +216,121 @@ const MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal( } return getContext().getELFSection(SectionName, getELFSectionType(SectionName, Kind), Flags, - Kind, /*EntrySize=*/0, Group); + /*EntrySize=*/0, Group); } -/// getSectionPrefixForGlobal - Return the section prefix name used by options -/// FunctionsSections and DataSections. +/// Return the section prefix name used by options FunctionsSections and +/// DataSections. static StringRef getSectionPrefixForGlobal(SectionKind Kind) { - if (Kind.isText()) return ".text."; - if (Kind.isReadOnly()) return ".rodata."; - if (Kind.isBSS()) return ".bss."; - - if (Kind.isThreadData()) return ".tdata."; - if (Kind.isThreadBSS()) return ".tbss."; - - if (Kind.isDataNoRel()) return ".data."; - if (Kind.isDataRelLocal()) return ".data.rel.local."; - if (Kind.isDataRel()) return ".data.rel."; - if (Kind.isReadOnlyWithRelLocal()) return ".data.rel.ro.local."; - + if (Kind.isText()) + return ".text"; + if (Kind.isReadOnly()) + return ".rodata"; + if (Kind.isBSS()) + return ".bss"; + if (Kind.isThreadData()) + return ".tdata"; + if (Kind.isThreadBSS()) + return ".tbss"; + if (Kind.isDataNoRel()) + return ".data"; + if (Kind.isDataRelLocal()) + return ".data.rel.local"; + if (Kind.isDataRel()) + return ".data.rel"; + if (Kind.isReadOnlyWithRelLocal()) + return ".data.rel.ro.local"; assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); - return ".data.rel.ro."; + return ".data.rel.ro"; } const MCSection *TargetLoweringObjectFileELF:: SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler &Mang, const TargetMachine &TM) const { + unsigned Flags = getELFSectionFlags(Kind); + // If we have -ffunction-section or -fdata-section then we should emit the // global value to a uniqued section specifically for it. - bool EmitUniquedSection; - if (Kind.isText()) - EmitUniquedSection = TM.getFunctionSections(); - else - EmitUniquedSection = TM.getDataSections(); - - // If this global is linkonce/weak and the target handles this by emitting it - // into a 'uniqued' section name, create and return the section now. - if ((GV->isWeakForLinker() || EmitUniquedSection || GV->hasComdat()) && - !Kind.isCommon()) { - StringRef Prefix = getSectionPrefixForGlobal(Kind); - - SmallString<128> Name(Prefix); - TM.getNameWithPrefix(Name, GV, Mang, true); - - StringRef Group = ""; - unsigned Flags = getELFSectionFlags(Kind); - if (GV->isWeakForLinker() || GV->hasComdat()) { - if (const Comdat *C = getELFComdat(GV)) - Group = C->getName(); - else - Group = Name.substr(Prefix.size()); - Flags |= ELF::SHF_GROUP; + bool EmitUniqueSection = false; + if (!(Flags & ELF::SHF_MERGE) && !Kind.isCommon()) { + if (Kind.isText()) + EmitUniqueSection = TM.getFunctionSections(); + else + EmitUniqueSection = TM.getDataSections(); + } + EmitUniqueSection |= GV->hasComdat(); + + unsigned EntrySize = 0; + if (Kind.isMergeableCString()) { + if (Kind.isMergeable2ByteCString()) { + EntrySize = 2; + } else if (Kind.isMergeable4ByteCString()) { + EntrySize = 4; + } else { + EntrySize = 1; + assert(Kind.isMergeable1ByteCString() && "unknown string width"); + } + } else if (Kind.isMergeableConst()) { + if (Kind.isMergeableConst4()) { + EntrySize = 4; + } else if (Kind.isMergeableConst8()) { + EntrySize = 8; + } else { + assert(Kind.isMergeableConst16() && "unknown data width"); + EntrySize = 16; } - - return getContext().getELFSection(Name.str(), - getELFSectionType(Name.str(), Kind), - Flags, Kind, 0, Group); } - if (Kind.isText()) return TextSection; - - if (Kind.isMergeable1ByteCString() || - Kind.isMergeable2ByteCString() || - Kind.isMergeable4ByteCString()) { + StringRef Group = ""; + if (const Comdat *C = getELFComdat(GV)) { + Flags |= ELF::SHF_GROUP; + Group = C->getName(); + } + bool UniqueSectionNames = TM.getUniqueSectionNames(); + SmallString<128> Name; + if (Kind.isMergeableCString()) { // We also need alignment here. // FIXME: this is getting the alignment of the character, not the // alignment of the global! unsigned Align = - TM.getSubtargetImpl()->getDataLayout()->getPreferredAlignment( - cast<GlobalVariable>(GV)); - - const char *SizeSpec = ".rodata.str1."; - if (Kind.isMergeable2ByteCString()) - SizeSpec = ".rodata.str2."; - else if (Kind.isMergeable4ByteCString()) - SizeSpec = ".rodata.str4."; - else - assert(Kind.isMergeable1ByteCString() && "unknown string width"); + TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV)); - - std::string Name = SizeSpec + utostr(Align); - return getContext().getELFSection(Name, ELF::SHT_PROGBITS, - ELF::SHF_ALLOC | - ELF::SHF_MERGE | - ELF::SHF_STRINGS, - Kind); + std::string SizeSpec = ".rodata.str" + utostr(EntrySize) + "."; + Name = SizeSpec + utostr(Align); + } else if (Kind.isMergeableConst()) { + Name = ".rodata.cst"; + Name += utostr(EntrySize); + } else { + Name = getSectionPrefixForGlobal(Kind); } - if (Kind.isMergeableConst()) { - if (Kind.isMergeableConst4() && MergeableConst4Section) - return MergeableConst4Section; - if (Kind.isMergeableConst8() && MergeableConst8Section) - return MergeableConst8Section; - if (Kind.isMergeableConst16() && MergeableConst16Section) - return MergeableConst16Section; - return ReadOnlySection; // .const + if (EmitUniqueSection && UniqueSectionNames) { + Name.push_back('.'); + TM.getNameWithPrefix(Name, GV, Mang, true); } + return getContext().getELFSection(Name, getELFSectionType(Name, Kind), Flags, + EntrySize, Group, + EmitUniqueSection && !UniqueSectionNames); +} + +const MCSection *TargetLoweringObjectFileELF::getSectionForJumpTable( + const Function &F, Mangler &Mang, const TargetMachine &TM) const { + // If the function can be removed, produce a unique section so that + // the table doesn't prevent the removal. + const Comdat *C = F.getComdat(); + bool EmitUniqueSection = TM.getFunctionSections() || C; + if (!EmitUniqueSection) + return ReadOnlySection; - if (Kind.isReadOnly()) return ReadOnlySection; - - if (Kind.isThreadData()) return TLSDataSection; - if (Kind.isThreadBSS()) return TLSBSSSection; - - // Note: we claim that common symbols are put in BSSSection, but they are - // really emitted with the magic .comm directive, which creates a symbol table - // entry but not a section. - if (Kind.isBSS() || Kind.isCommon()) return BSSSection; - - if (Kind.isDataNoRel()) return DataSection; - if (Kind.isDataRelLocal()) return DataRelLocalSection; - if (Kind.isDataRel()) return DataRelSection; - if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection; + return SelectSectionForGlobal(&F, SectionKind::getReadOnly(), Mang, TM); +} - assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); - return DataRelROSection; +bool TargetLoweringObjectFileELF::shouldPutJumpTableInFunctionSection( + bool UsesLabelDifference, const Function &F) const { + // We can always create relative relocations, so use another section + // that can be marked non-executable. + return false; } /// getSectionForConstant - Given a mergeable constant with the @@ -366,7 +361,6 @@ static const MCSectionELF *getStaticStructorSection(MCContext &Ctx, std::string Name; unsigned Type; unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE; - SectionKind Kind = SectionKind::getDataRel(); StringRef COMDAT = KeySym ? KeySym->getName() : ""; if (KeySym) @@ -398,7 +392,7 @@ static const MCSectionELF *getStaticStructorSection(MCContext &Ctx, Type = ELF::SHT_PROGBITS; } - return Ctx.getELFSection(Name, Type, Flags, Kind, 0, COMDAT); + return Ctx.getELFSection(Name, Type, Flags, 0, COMDAT); } const MCSection *TargetLoweringObjectFileELF::getStaticCtorSection( @@ -419,16 +413,10 @@ TargetLoweringObjectFileELF::InitializeELF(bool UseInitArray_) { if (!UseInitArray) return; - StaticCtorSection = - getContext().getELFSection(".init_array", ELF::SHT_INIT_ARRAY, - ELF::SHF_WRITE | - ELF::SHF_ALLOC, - SectionKind::getDataRel()); - StaticDtorSection = - getContext().getELFSection(".fini_array", ELF::SHT_FINI_ARRAY, - ELF::SHF_WRITE | - ELF::SHF_ALLOC, - SectionKind::getDataRel()); + StaticCtorSection = getContext().getELFSection( + ".init_array", ELF::SHT_INIT_ARRAY, ELF::SHF_WRITE | ELF::SHF_ALLOC); + StaticDtorSection = getContext().getELFSection( + ".fini_array", ELF::SHT_FINI_ARRAY, ELF::SHF_WRITE | ELF::SHF_ALLOC); } //===----------------------------------------------------------------------===// @@ -464,14 +452,15 @@ emitModuleFlags(MCStreamer &Streamer, continue; StringRef Key = MFE.Key->getString(); - Value *Val = MFE.Val; + Metadata *Val = MFE.Val; if (Key == "Objective-C Image Info Version") { - VersionVal = cast<ConstantInt>(Val)->getZExtValue(); + VersionVal = mdconst::extract<ConstantInt>(Val)->getZExtValue(); } else if (Key == "Objective-C Garbage Collection" || Key == "Objective-C GC Only" || - Key == "Objective-C Is Simulated") { - ImageInfoFlags |= cast<ConstantInt>(Val)->getZExtValue(); + Key == "Objective-C Is Simulated" || + Key == "Objective-C Image Swift Version") { + ImageInfoFlags |= mdconst::extract<ConstantInt>(Val)->getZExtValue(); } else if (Key == "Objective-C Image Info Section") { SectionVal = cast<MDString>(Val)->getString(); } else if (Key == "Linker Options") { @@ -572,60 +561,6 @@ const MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal( return S; } -bool TargetLoweringObjectFileMachO::isSectionAtomizableBySymbols( - const MCSection &Section) const { - const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section); - - // Sections holding 1 byte strings are atomized based on the data - // they contain. - // Sections holding 2 byte strings require symbols in order to be - // atomized. - // There is no dedicated section for 4 byte strings. - if (SMO.getKind().isMergeable1ByteCString()) - return false; - - if (SMO.getSegmentName() == "__TEXT" && - SMO.getSectionName() == "__objc_classname" && - SMO.getType() == MachO::S_CSTRING_LITERALS) - return false; - - if (SMO.getSegmentName() == "__TEXT" && - SMO.getSectionName() == "__objc_methname" && - SMO.getType() == MachO::S_CSTRING_LITERALS) - return false; - - if (SMO.getSegmentName() == "__TEXT" && - SMO.getSectionName() == "__objc_methtype" && - SMO.getType() == MachO::S_CSTRING_LITERALS) - return false; - - if (SMO.getSegmentName() == "__DATA" && - SMO.getSectionName() == "__cfstring") - return false; - - // no_dead_strip sections are not atomized in practice. - if (SMO.hasAttribute(MachO::S_ATTR_NO_DEAD_STRIP)) - return false; - - switch (SMO.getType()) { - default: - return true; - - // These sections are atomized at the element boundaries without using - // symbols. - case MachO::S_4BYTE_LITERALS: - case MachO::S_8BYTE_LITERALS: - case MachO::S_16BYTE_LITERALS: - case MachO::S_LITERAL_POINTERS: - case MachO::S_NON_LAZY_SYMBOL_POINTERS: - case MachO::S_LAZY_SYMBOL_POINTERS: - case MachO::S_MOD_INIT_FUNC_POINTERS: - case MachO::S_MOD_TERM_FUNC_POINTERS: - case MachO::S_INTERPOSING: - return false; - } -} - const MCSection *TargetLoweringObjectFileMachO:: SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler &Mang, const TargetMachine &TM) const { @@ -648,16 +583,14 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, // FIXME: Alignment check should be handled by section classifier. if (Kind.isMergeable1ByteCString() && - TM.getSubtargetImpl()->getDataLayout()->getPreferredAlignment( - cast<GlobalVariable>(GV)) < 32) + TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32) return CStringSection; // Do not put 16-bit arrays in the UString section if they have an // externally visible label, this runs into issues with certain linker // versions. if (Kind.isMergeable2ByteCString() && !GV->hasExternalLinkage() && - TM.getSubtargetImpl()->getDataLayout()->getPreferredAlignment( - cast<GlobalVariable>(GV)) < 32) + TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32) return UStringSection; // With MachO only variables whose corresponding symbol starts with 'l' or @@ -854,7 +787,7 @@ const MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal( unsigned Characteristics = getCOFFSectionFlags(Kind); StringRef Name = GV->getSection(); StringRef COMDATSymName = ""; - if ((GV->isWeakForLinker() || GV->hasComdat()) && !Kind.isCommon()) { + if (GV->hasComdat()) { Selection = getSelectionForCOFF(GV); const GlobalValue *ComdatGV; if (Selection == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) @@ -901,12 +834,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, else EmitUniquedSection = TM.getDataSections(); - // If this global is linkonce/weak and the target handles this by emitting it - // into a 'uniqued' section name, create and return the section now. - // Section names depend on the name of the symbol which is not feasible if the - // symbol has private linkage. - if ((GV->isWeakForLinker() || EmitUniquedSection || GV->hasComdat()) && - !Kind.isCommon()) { + if ((EmitUniquedSection && !Kind.isCommon()) || GV->hasComdat()) { const char *Name = getCOFFSectionNameForUniqueGlobal(Kind); unsigned Characteristics = getCOFFSectionFlags(Kind); @@ -965,7 +893,7 @@ emitModuleFlags(MCStreamer &Streamer, i = ModuleFlags.begin(), e = ModuleFlags.end(); i != e; ++i) { const Module::ModuleFlagEntry &MFE = *i; StringRef Key = MFE.Key->getString(); - Value *Val = MFE.Val; + Metadata *Val = MFE.Val; if (Key == "Linker Options") { LinkerOptions = cast<MDNode>(Val); break; @@ -982,21 +910,10 @@ emitModuleFlags(MCStreamer &Streamer, MDNode *MDOptions = cast<MDNode>(LinkerOptions->getOperand(i)); for (unsigned ii = 0, ie = MDOptions->getNumOperands(); ii != ie; ++ii) { MDString *MDOption = cast<MDString>(MDOptions->getOperand(ii)); - StringRef Op = MDOption->getString(); // Lead with a space for consistency with our dllexport implementation. - std::string Escaped(" "); - if (Op.find(" ") != StringRef::npos) { - // The PE-COFF spec says args with spaces must be quoted. It doesn't say - // how to escape quotes, but it probably uses this algorithm: - // http://msdn.microsoft.com/en-us/library/17w5ykft(v=vs.85).aspx - // FIXME: Reuse escaping code from Support/Windows/Program.inc - Escaped.push_back('\"'); - Escaped.append(Op); - Escaped.push_back('\"'); - } else { - Escaped.append(Op); - } - Streamer.EmitBytes(Escaped); + std::string Directive(" "); + Directive.append(MDOption->getString()); + Streamer.EmitBytes(Directive); } } } diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index e218a83..1bbe6e1 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1515,9 +1515,9 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { MF = &Func; const TargetMachine &TM = MF->getTarget(); MRI = &MF->getRegInfo(); - TII = TM.getSubtargetImpl()->getInstrInfo(); - TRI = TM.getSubtargetImpl()->getRegisterInfo(); - InstrItins = TM.getSubtargetImpl()->getInstrItineraryData(); + TII = MF->getSubtarget().getInstrInfo(); + TRI = MF->getSubtarget().getRegisterInfo(); + InstrItins = MF->getSubtarget().getInstrItineraryData(); LV = getAnalysisIfAvailable<LiveVariables>(); LIS = getAnalysisIfAvailable<LiveIntervals>(); AA = &getAnalysis<AliasAnalysis>(); diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp index 7824f92..d393e10 100644 --- a/lib/CodeGen/UnreachableBlockElim.cpp +++ b/lib/CodeGen/UnreachableBlockElim.cpp @@ -88,7 +88,7 @@ bool UnreachableBlockElim::runOnFunction(Function &F) { DeadBlocks[i]->eraseFromParent(); } - return DeadBlocks.size(); + return !DeadBlocks.empty(); } @@ -204,5 +204,5 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { F.RenumberBlocks(); - return (DeadBlocks.size() || ModifiedPHI); + return (!DeadBlocks.empty() || ModifiedPHI); } diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index 0d17d43..7d3b0ce 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -252,20 +252,41 @@ void VirtRegRewriter::addMBBLiveIns() { unsigned PhysReg = VRM->getPhys(VirtReg); assert(PhysReg != VirtRegMap::NO_PHYS_REG && "Unmapped virtual register."); - // Scan the segments of LI. - for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I != E; - ++I) { - if (!Indexes->findLiveInMBBs(I->start, I->end, LiveIn)) - continue; - for (unsigned i = 0, e = LiveIn.size(); i != e; ++i) - if (!LiveIn[i]->isLiveIn(PhysReg)) - LiveIn[i]->addLiveIn(PhysReg); - LiveIn.clear(); + if (LI.hasSubRanges()) { + for (LiveInterval::SubRange &S : LI.subranges()) { + for (const auto &Seg : S.segments) { + if (!Indexes->findLiveInMBBs(Seg.start, Seg.end, LiveIn)) + continue; + for (MCSubRegIndexIterator SR(PhysReg, TRI); SR.isValid(); ++SR) { + unsigned SubReg = SR.getSubReg(); + unsigned SubRegIndex = SR.getSubRegIndex(); + unsigned SubRegLaneMask = TRI->getSubRegIndexLaneMask(SubRegIndex); + if ((SubRegLaneMask & S.LaneMask) == 0) + continue; + for (unsigned i = 0, e = LiveIn.size(); i != e; ++i) { + if (!LiveIn[i]->isLiveIn(SubReg)) + LiveIn[i]->addLiveIn(SubReg); + } + } + LiveIn.clear(); + } + } + } else { + // Scan the segments of LI. + for (const auto &Seg : LI.segments) { + if (!Indexes->findLiveInMBBs(Seg.start, Seg.end, LiveIn)) + continue; + for (unsigned i = 0, e = LiveIn.size(); i != e; ++i) + if (!LiveIn[i]->isLiveIn(PhysReg)) + LiveIn[i]->addLiveIn(PhysReg); + LiveIn.clear(); + } } } } void VirtRegRewriter::rewrite() { + bool NoSubRegLiveness = !MRI->tracksSubRegLiveness(); SmallVector<unsigned, 8> SuperDeads; SmallVector<unsigned, 8> SuperDefs; SmallVector<unsigned, 8> SuperKills; @@ -347,7 +368,8 @@ void VirtRegRewriter::rewrite() { // A virtual register kill refers to the whole register, so we may // have to add <imp-use,kill> operands for the super-register. A // partial redef always kills and redefines the super-register. - if (MO.readsReg() && (MO.isDef() || MO.isKill())) + if (NoSubRegLiveness && MO.readsReg() + && (MO.isDef() || MO.isKill())) SuperKills.push_back(PhysReg); if (MO.isDef()) { @@ -358,10 +380,12 @@ void VirtRegRewriter::rewrite() { MO.setIsUndef(false); // Also add implicit defs for the super-register. - if (MO.isDead()) - SuperDeads.push_back(PhysReg); - else - SuperDefs.push_back(PhysReg); + if (NoSubRegLiveness) { + if (MO.isDead()) + SuperDeads.push_back(PhysReg); + else + SuperDefs.push_back(PhysReg); + } } // PhysReg operands cannot have subregister indexes. diff --git a/lib/CodeGen/WinEHPrepare.cpp b/lib/CodeGen/WinEHPrepare.cpp new file mode 100644 index 0000000..6f712a9 --- /dev/null +++ b/lib/CodeGen/WinEHPrepare.cpp @@ -0,0 +1,626 @@ +//===-- WinEHPrepare - Prepare exception handling for code generation ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass lowers LLVM IR exception handling into something closer to what the +// backend wants. It snifs the personality function to see which kind of +// preparation is necessary. If the personality function uses the Itanium LSDA, +// this pass delegates to the DWARF EH preparation pass. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/TinyPtrVector.h" +#include "llvm/Analysis/LibCallSemantics.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" +#include <memory> + +using namespace llvm; +using namespace llvm::PatternMatch; + +#define DEBUG_TYPE "winehprepare" + +namespace { + +struct HandlerAllocas { + TinyPtrVector<AllocaInst *> Allocas; + int ParentFrameAllocationIndex; +}; + +// This map is used to model frame variable usage during outlining, to +// construct a structure type to hold the frame variables in a frame +// allocation block, and to remap the frame variable allocas (including +// spill locations as needed) to GEPs that get the variable from the +// frame allocation structure. +typedef MapVector<AllocaInst *, HandlerAllocas> FrameVarInfoMap; + +class WinEHPrepare : public FunctionPass { + std::unique_ptr<FunctionPass> DwarfPrepare; + +public: + static char ID; // Pass identification, replacement for typeid. + WinEHPrepare(const TargetMachine *TM = nullptr) + : FunctionPass(ID), DwarfPrepare(createDwarfEHPass(TM)) {} + + bool runOnFunction(Function &Fn) override; + + bool doFinalization(Module &M) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + const char *getPassName() const override { + return "Windows exception handling preparation"; + } + +private: + bool prepareCPPEHHandlers(Function &F, + SmallVectorImpl<LandingPadInst *> &LPads); + bool outlineCatchHandler(Function *SrcFn, Constant *SelectorType, + LandingPadInst *LPad, CallInst *&EHAlloc, + AllocaInst *&EHObjPtr, FrameVarInfoMap &VarInfo); +}; + +class WinEHFrameVariableMaterializer : public ValueMaterializer { +public: + WinEHFrameVariableMaterializer(Function *OutlinedFn, + FrameVarInfoMap &FrameVarInfo); + ~WinEHFrameVariableMaterializer() {} + + virtual Value *materializeValueFor(Value *V) override; + +private: + FrameVarInfoMap &FrameVarInfo; + IRBuilder<> Builder; +}; + +class WinEHCatchDirector : public CloningDirector { +public: + WinEHCatchDirector(LandingPadInst *LPI, Function *CatchFn, Value *Selector, + Value *EHObj, FrameVarInfoMap &VarInfo) + : LPI(LPI), CurrentSelector(Selector->stripPointerCasts()), EHObj(EHObj), + Materializer(CatchFn, VarInfo), + SelectorIDType(Type::getInt32Ty(LPI->getContext())), + Int8PtrType(Type::getInt8PtrTy(LPI->getContext())) {} + + CloningAction handleInstruction(ValueToValueMapTy &VMap, + const Instruction *Inst, + BasicBlock *NewBB) override; + + ValueMaterializer *getValueMaterializer() override { return &Materializer; } + +private: + LandingPadInst *LPI; + Value *CurrentSelector; + Value *EHObj; + WinEHFrameVariableMaterializer Materializer; + Type *SelectorIDType; + Type *Int8PtrType; + + const Value *ExtractedEHPtr; + const Value *ExtractedSelector; + const Value *EHPtrStoreAddr; + const Value *SelectorStoreAddr; +}; +} // end anonymous namespace + +char WinEHPrepare::ID = 0; +INITIALIZE_TM_PASS(WinEHPrepare, "winehprepare", "Prepare Windows exceptions", + false, false) + +FunctionPass *llvm::createWinEHPass(const TargetMachine *TM) { + return new WinEHPrepare(TM); +} + +static bool isMSVCPersonality(EHPersonality Pers) { + return Pers == EHPersonality::MSVC_Win64SEH || + Pers == EHPersonality::MSVC_CXX; +} + +bool WinEHPrepare::runOnFunction(Function &Fn) { + SmallVector<LandingPadInst *, 4> LPads; + SmallVector<ResumeInst *, 4> Resumes; + for (BasicBlock &BB : Fn) { + if (auto *LP = BB.getLandingPadInst()) + LPads.push_back(LP); + if (auto *Resume = dyn_cast<ResumeInst>(BB.getTerminator())) + Resumes.push_back(Resume); + } + + // No need to prepare functions that lack landing pads. + if (LPads.empty()) + return false; + + // Classify the personality to see what kind of preparation we need. + EHPersonality Pers = classifyEHPersonality(LPads.back()->getPersonalityFn()); + + // Delegate through to the DWARF pass if this is unrecognized. + if (!isMSVCPersonality(Pers)) + return DwarfPrepare->runOnFunction(Fn); + + // FIXME: This only returns true if the C++ EH handlers were outlined. + // When that code is complete, it should always return whatever + // prepareCPPEHHandlers returns. + if (Pers == EHPersonality::MSVC_CXX && prepareCPPEHHandlers(Fn, LPads)) + return true; + + // FIXME: SEH Cleanups are unimplemented. Replace them with unreachable. + if (Resumes.empty()) + return false; + + for (ResumeInst *Resume : Resumes) { + IRBuilder<>(Resume).CreateUnreachable(); + Resume->eraseFromParent(); + } + + return true; +} + +bool WinEHPrepare::doFinalization(Module &M) { + return DwarfPrepare->doFinalization(M); +} + +void WinEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const { + DwarfPrepare->getAnalysisUsage(AU); +} + +bool WinEHPrepare::prepareCPPEHHandlers( + Function &F, SmallVectorImpl<LandingPadInst *> &LPads) { + // These containers are used to re-map frame variables that are used in + // outlined catch and cleanup handlers. They will be populated as the + // handlers are outlined. + FrameVarInfoMap FrameVarInfo; + SmallVector<CallInst *, 4> HandlerAllocs; + SmallVector<AllocaInst *, 4> HandlerEHObjPtrs; + + bool HandlersOutlined = false; + + for (LandingPadInst *LPad : LPads) { + // Look for evidence that this landingpad has already been processed. + bool LPadHasActionList = false; + BasicBlock *LPadBB = LPad->getParent(); + for (Instruction &Inst : LPadBB->getInstList()) { + // FIXME: Make this an intrinsic. + if (auto *Call = dyn_cast<CallInst>(&Inst)) + if (Call->getCalledFunction()->getName() == "llvm.eh.actions") { + LPadHasActionList = true; + break; + } + } + + // If we've already outlined the handlers for this landingpad, + // there's nothing more to do here. + if (LPadHasActionList) + continue; + + for (unsigned Idx = 0, NumClauses = LPad->getNumClauses(); Idx < NumClauses; + ++Idx) { + if (LPad->isCatch(Idx)) { + // Create a new instance of the handler data structure in the + // HandlerData vector. + CallInst *EHAlloc = nullptr; + AllocaInst *EHObjPtr = nullptr; + bool Outlined = outlineCatchHandler(&F, LPad->getClause(Idx), LPad, + EHAlloc, EHObjPtr, FrameVarInfo); + if (Outlined) { + HandlersOutlined = true; + // These values must be resolved after all handlers have been + // outlined. + if (EHAlloc) + HandlerAllocs.push_back(EHAlloc); + if (EHObjPtr) + HandlerEHObjPtrs.push_back(EHObjPtr); + } + } // End if (isCatch) + } // End for each clause + } // End for each landingpad + + // If nothing got outlined, there is no more processing to be done. + if (!HandlersOutlined) + return false; + + // FIXME: We will replace the landingpad bodies with llvm.eh.actions + // calls and indirect branches here and then delete blocks + // which are no longer reachable. That will get rid of the + // handlers that we have outlined. There is code below + // that looks for allocas with no uses in the parent function. + // That will only happen after the pruning is implemented. + + // Remap the frame variables. + SmallVector<Type *, 2> StructTys; + StructTys.push_back(Type::getInt32Ty(F.getContext())); // EH state + StructTys.push_back(Type::getInt8PtrTy(F.getContext())); // EH object + + // Start the index at two since we always have the above fields at 0 and 1. + int Idx = 2; + + // FIXME: Sort the FrameVarInfo vector by the ParentAlloca size and alignment + // and add padding as necessary to provide the proper alignment. + + // Map the alloca instructions to the corresponding index in the + // frame allocation structure. If any alloca is used only in a single + // handler and is not used in the parent frame after outlining, it will + // be assigned an index of -1, meaning the handler can keep its + // "temporary" alloca and the original alloca can be erased from the + // parent function. If we later encounter this alloca in a second + // handler, we will assign it a place in the frame allocation structure + // at that time. Since the instruction replacement doesn't happen until + // all the entries in the HandlerData have been processed this isn't a + // problem. + for (auto &VarInfoEntry : FrameVarInfo) { + AllocaInst *ParentAlloca = VarInfoEntry.first; + HandlerAllocas &AllocaInfo = VarInfoEntry.second; + + // If the instruction still has uses in the parent function or if it is + // referenced by more than one handler, add it to the frame allocation + // structure. + if (ParentAlloca->getNumUses() != 0 || AllocaInfo.Allocas.size() > 1) { + Type *VarTy = ParentAlloca->getAllocatedType(); + StructTys.push_back(VarTy); + AllocaInfo.ParentFrameAllocationIndex = Idx++; + } else { + // If the variable is not used in the parent frame and it is only used + // in one handler, the alloca can be removed from the parent frame + // and the handler will keep its "temporary" alloca to define the value. + // An element index of -1 is used to indicate this condition. + AllocaInfo.ParentFrameAllocationIndex = -1; + } + } + + // Having filled the StructTys vector and assigned an index to each element, + // we can now create the structure. + StructType *EHDataStructTy = StructType::create( + F.getContext(), StructTys, "struct." + F.getName().str() + ".ehdata"); + IRBuilder<> Builder(F.getParent()->getContext()); + + // Create a frame allocation. + Module *M = F.getParent(); + LLVMContext &Context = M->getContext(); + BasicBlock *Entry = &F.getEntryBlock(); + Builder.SetInsertPoint(Entry->getFirstInsertionPt()); + Function *FrameAllocFn = + Intrinsic::getDeclaration(M, Intrinsic::frameallocate); + uint64_t EHAllocSize = M->getDataLayout()->getTypeAllocSize(EHDataStructTy); + Value *FrameAllocArgs[] = { + ConstantInt::get(Type::getInt32Ty(Context), EHAllocSize)}; + CallInst *FrameAlloc = + Builder.CreateCall(FrameAllocFn, FrameAllocArgs, "frame.alloc"); + + Value *FrameEHData = Builder.CreateBitCast( + FrameAlloc, EHDataStructTy->getPointerTo(), "eh.data"); + + // Now visit each handler that is using the structure and bitcast its EHAlloc + // value to be a pointer to the frame alloc structure. + DenseMap<Function *, Value *> EHDataMap; + for (CallInst *EHAlloc : HandlerAllocs) { + // The EHAlloc has no uses at this time, so we need to just insert the + // cast before the next instruction. There is always a next instruction. + BasicBlock::iterator II = EHAlloc; + ++II; + Builder.SetInsertPoint(cast<Instruction>(II)); + Value *EHData = Builder.CreateBitCast( + EHAlloc, EHDataStructTy->getPointerTo(), "eh.data"); + EHDataMap[EHAlloc->getParent()->getParent()] = EHData; + } + + // Next, replace the place-holder EHObjPtr allocas with GEP instructions + // that pull the EHObjPtr from the frame alloc structure + for (AllocaInst *EHObjPtr : HandlerEHObjPtrs) { + Value *EHData = EHDataMap[EHObjPtr->getParent()->getParent()]; + Builder.SetInsertPoint(EHObjPtr); + Value *ElementPtr = Builder.CreateConstInBoundsGEP2_32(EHData, 0, 1); + EHObjPtr->replaceAllUsesWith(ElementPtr); + EHObjPtr->removeFromParent(); + ElementPtr->takeName(EHObjPtr); + delete EHObjPtr; + } + + // Finally, replace all of the temporary allocas for frame variables used in + // the outlined handlers and the original frame allocas with GEP instructions + // that get the equivalent pointer from the frame allocation struct. + for (auto &VarInfoEntry : FrameVarInfo) { + AllocaInst *ParentAlloca = VarInfoEntry.first; + HandlerAllocas &AllocaInfo = VarInfoEntry.second; + int Idx = AllocaInfo.ParentFrameAllocationIndex; + + // If we have an index of -1 for this instruction, it means it isn't used + // outside of this handler. In that case, we just keep the "temporary" + // alloca in the handler and erase the original alloca from the parent. + if (Idx == -1) { + ParentAlloca->eraseFromParent(); + } else { + // Otherwise, we replace the parent alloca and all outlined allocas + // which map to it with GEP instructions. + + // First replace the original alloca. + Builder.SetInsertPoint(ParentAlloca); + Builder.SetCurrentDebugLocation(ParentAlloca->getDebugLoc()); + Value *ElementPtr = + Builder.CreateConstInBoundsGEP2_32(FrameEHData, 0, Idx); + ParentAlloca->replaceAllUsesWith(ElementPtr); + ParentAlloca->removeFromParent(); + ElementPtr->takeName(ParentAlloca); + delete ParentAlloca; + + // Next replace all outlined allocas that are mapped to it. + for (AllocaInst *TempAlloca : AllocaInfo.Allocas) { + Value *EHData = EHDataMap[TempAlloca->getParent()->getParent()]; + // FIXME: Sink this GEP into the blocks where it is used. + Builder.SetInsertPoint(TempAlloca); + Builder.SetCurrentDebugLocation(TempAlloca->getDebugLoc()); + ElementPtr = Builder.CreateConstInBoundsGEP2_32(EHData, 0, Idx); + TempAlloca->replaceAllUsesWith(ElementPtr); + TempAlloca->removeFromParent(); + ElementPtr->takeName(TempAlloca); + delete TempAlloca; + } + } // end else of if (Idx == -1) + } // End for each FrameVarInfo entry. + + return HandlersOutlined; +} + +bool WinEHPrepare::outlineCatchHandler(Function *SrcFn, Constant *SelectorType, + LandingPadInst *LPad, CallInst *&EHAlloc, + AllocaInst *&EHObjPtr, + FrameVarInfoMap &VarInfo) { + Module *M = SrcFn->getParent(); + LLVMContext &Context = M->getContext(); + + // Create a new function to receive the handler contents. + Type *Int8PtrType = Type::getInt8PtrTy(Context); + std::vector<Type *> ArgTys; + ArgTys.push_back(Int8PtrType); + ArgTys.push_back(Int8PtrType); + FunctionType *FnType = FunctionType::get(Int8PtrType, ArgTys, false); + Function *CatchHandler = Function::Create( + FnType, GlobalVariable::ExternalLinkage, SrcFn->getName() + ".catch", M); + + // Generate a standard prolog to setup the frame recovery structure. + IRBuilder<> Builder(Context); + BasicBlock *Entry = BasicBlock::Create(Context, "catch.entry"); + CatchHandler->getBasicBlockList().push_front(Entry); + Builder.SetInsertPoint(Entry); + Builder.SetCurrentDebugLocation(LPad->getDebugLoc()); + + // The outlined handler will be called with the parent's frame pointer as + // its second argument. To enable the handler to access variables from + // the parent frame, we use that pointer to get locate a special block + // of memory that was allocated using llvm.eh.allocateframe for this + // purpose. During the outlining process we will determine which frame + // variables are used in handlers and create a structure that maps these + // variables into the frame allocation block. + // + // The frame allocation block also contains an exception state variable + // used by the runtime and a pointer to the exception object pointer + // which will be filled in by the runtime for use in the handler. + Function *RecoverFrameFn = + Intrinsic::getDeclaration(M, Intrinsic::framerecover); + Value *RecoverArgs[] = {Builder.CreateBitCast(SrcFn, Int8PtrType, ""), + &(CatchHandler->getArgumentList().back())}; + EHAlloc = Builder.CreateCall(RecoverFrameFn, RecoverArgs, "eh.alloc"); + + // This alloca is only temporary. We'll be replacing it once we know all the + // frame variables that need to go in the frame allocation structure. + EHObjPtr = Builder.CreateAlloca(Int8PtrType, 0, "eh.obj.ptr"); + + // This will give us a raw pointer to the exception object, which + // corresponds to the formal parameter of the catch statement. If the + // handler uses this object, we will generate code during the outlining + // process to cast the pointer to the appropriate type and deference it + // as necessary. The un-outlined landing pad code represents the + // exception object as the result of the llvm.eh.begincatch call. + Value *EHObj = Builder.CreateLoad(EHObjPtr, false, "eh.obj"); + + ValueToValueMapTy VMap; + + // FIXME: Map other values referenced in the filter handler. + + WinEHCatchDirector Director(LPad, CatchHandler, SelectorType, EHObj, VarInfo); + + SmallVector<ReturnInst *, 8> Returns; + ClonedCodeInfo InlinedFunctionInfo; + + BasicBlock::iterator II = LPad; + + CloneAndPruneIntoFromInst(CatchHandler, SrcFn, ++II, VMap, + /*ModuleLevelChanges=*/false, Returns, "", + &InlinedFunctionInfo, + SrcFn->getParent()->getDataLayout(), &Director); + + // Move all the instructions in the first cloned block into our entry block. + BasicBlock *FirstClonedBB = std::next(Function::iterator(Entry)); + Entry->getInstList().splice(Entry->end(), FirstClonedBB->getInstList()); + FirstClonedBB->eraseFromParent(); + + return true; +} + +CloningDirector::CloningAction WinEHCatchDirector::handleInstruction( + ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) { + // Intercept instructions which extract values from the landing pad aggregate. + if (auto *Extract = dyn_cast<ExtractValueInst>(Inst)) { + if (Extract->getAggregateOperand() == LPI) { + assert(Extract->getNumIndices() == 1 && + "Unexpected operation: extracting both landing pad values"); + assert((*(Extract->idx_begin()) == 0 || *(Extract->idx_begin()) == 1) && + "Unexpected operation: extracting an unknown landing pad element"); + + if (*(Extract->idx_begin()) == 0) { + // Element 0 doesn't directly corresponds to anything in the WinEH + // scheme. + // It will be stored to a memory location, then later loaded and finally + // the loaded value will be used as the argument to an + // llvm.eh.begincatch + // call. We're tracking it here so that we can skip the store and load. + ExtractedEHPtr = Inst; + } else { + // Element 1 corresponds to the filter selector. We'll map it to 1 for + // matching purposes, but it will also probably be stored to memory and + // reloaded, so we need to track the instuction so that we can map the + // loaded value too. + VMap[Inst] = ConstantInt::get(SelectorIDType, 1); + ExtractedSelector = Inst; + } + + // Tell the caller not to clone this instruction. + return CloningDirector::SkipInstruction; + } + // Other extract value instructions just get cloned. + return CloningDirector::CloneInstruction; + } + + if (auto *Store = dyn_cast<StoreInst>(Inst)) { + // Look for and suppress stores of the extracted landingpad values. + const Value *StoredValue = Store->getValueOperand(); + if (StoredValue == ExtractedEHPtr) { + EHPtrStoreAddr = Store->getPointerOperand(); + return CloningDirector::SkipInstruction; + } + if (StoredValue == ExtractedSelector) { + SelectorStoreAddr = Store->getPointerOperand(); + return CloningDirector::SkipInstruction; + } + + // Any other store just gets cloned. + return CloningDirector::CloneInstruction; + } + + if (auto *Load = dyn_cast<LoadInst>(Inst)) { + // Look for loads of (previously suppressed) landingpad values. + // The EHPtr load can be ignored (it should only be used as + // an argument to llvm.eh.begincatch), but the selector value + // needs to be mapped to a constant value of 1 to be used to + // simplify the branching to always flow to the current handler. + const Value *LoadAddr = Load->getPointerOperand(); + if (LoadAddr == EHPtrStoreAddr) { + VMap[Inst] = UndefValue::get(Int8PtrType); + return CloningDirector::SkipInstruction; + } + if (LoadAddr == SelectorStoreAddr) { + VMap[Inst] = ConstantInt::get(SelectorIDType, 1); + return CloningDirector::SkipInstruction; + } + + // Any other loads just get cloned. + return CloningDirector::CloneInstruction; + } + + if (match(Inst, m_Intrinsic<Intrinsic::eh_begincatch>())) { + // The argument to the call is some form of the first element of the + // landingpad aggregate value, but that doesn't matter. It isn't used + // here. + // The return value of this instruction, however, is used to access the + // EH object pointer. We have generated an instruction to get that value + // from the EH alloc block, so we can just map to that here. + VMap[Inst] = EHObj; + return CloningDirector::SkipInstruction; + } + if (match(Inst, m_Intrinsic<Intrinsic::eh_endcatch>())) { + auto *IntrinCall = dyn_cast<IntrinsicInst>(Inst); + // It might be interesting to track whether or not we are inside a catch + // function, but that might make the algorithm more brittle than it needs + // to be. + + // The end catch call can occur in one of two places: either in a + // landingpad + // block that is part of the catch handlers exception mechanism, or at the + // end of the catch block. If it occurs in a landing pad, we must skip it + // and continue so that the landing pad gets cloned. + // FIXME: This case isn't fully supported yet and shouldn't turn up in any + // of the test cases until it is. + if (IntrinCall->getParent()->isLandingPad()) + return CloningDirector::SkipInstruction; + + // If an end catch occurs anywhere else the next instruction should be an + // unconditional branch instruction that we want to replace with a return + // to the the address of the branch target. + const BasicBlock *EndCatchBB = IntrinCall->getParent(); + const TerminatorInst *Terminator = EndCatchBB->getTerminator(); + const BranchInst *Branch = dyn_cast<BranchInst>(Terminator); + assert(Branch && Branch->isUnconditional()); + assert(std::next(BasicBlock::const_iterator(IntrinCall)) == + BasicBlock::const_iterator(Branch)); + + ReturnInst::Create(NewBB->getContext(), + BlockAddress::get(Branch->getSuccessor(0)), NewBB); + + // We just added a terminator to the cloned block. + // Tell the caller to stop processing the current basic block so that + // the branch instruction will be skipped. + return CloningDirector::StopCloningBB; + } + if (match(Inst, m_Intrinsic<Intrinsic::eh_typeid_for>())) { + auto *IntrinCall = dyn_cast<IntrinsicInst>(Inst); + Value *Selector = IntrinCall->getArgOperand(0)->stripPointerCasts(); + // This causes a replacement that will collapse the landing pad CFG based + // on the filter function we intend to match. + if (Selector == CurrentSelector) + VMap[Inst] = ConstantInt::get(SelectorIDType, 1); + else + VMap[Inst] = ConstantInt::get(SelectorIDType, 0); + // Tell the caller not to clone this instruction. + return CloningDirector::SkipInstruction; + } + + // Continue with the default cloning behavior. + return CloningDirector::CloneInstruction; +} + +WinEHFrameVariableMaterializer::WinEHFrameVariableMaterializer( + Function *OutlinedFn, FrameVarInfoMap &FrameVarInfo) + : FrameVarInfo(FrameVarInfo), Builder(OutlinedFn->getContext()) { + Builder.SetInsertPoint(&OutlinedFn->getEntryBlock()); + // FIXME: Do something with the FrameVarMapped so that it is shared across the + // function. +} + +Value *WinEHFrameVariableMaterializer::materializeValueFor(Value *V) { + // If we're asked to materialize an alloca variable, we temporarily + // create a matching alloca in the outlined function. When all the + // outlining is complete, we'll collect these into a structure and + // replace these temporary allocas with GEPs referencing the frame + // allocation block. + if (auto *AV = dyn_cast<AllocaInst>(V)) { + AllocaInst *NewAlloca = Builder.CreateAlloca( + AV->getAllocatedType(), AV->getArraySize(), AV->getName()); + FrameVarInfo[AV].Allocas.push_back(NewAlloca); + return NewAlloca; + } + +// FIXME: Do PHI nodes need special handling? + +// FIXME: Are there other cases we can handle better? GEP, ExtractValue, etc. + +// FIXME: This doesn't work during cloning because it finds an instruction +// in the use list that isn't yet part of a basic block. +#if 0 + // If we're asked to remap some other instruction, we'll need to + // spill it to an alloca variable in the parent function and add a + // temporary alloca in the outlined function to be processed as + // described above. + Instruction *Inst = dyn_cast<Instruction>(V); + if (Inst) { + AllocaInst *Spill = DemoteRegToStack(*Inst, true); + AllocaInst *NewAlloca = Builder.CreateAlloca(Spill->getAllocatedType(), + Spill->getArraySize()); + FrameVarMap[AV] = NewAlloca; + return NewAlloca; + } +#endif + + return nullptr; +} |