diff options
author | Pirama Arumuga Nainar <pirama@google.com> | 2015-04-08 08:55:49 -0700 |
---|---|---|
committer | Pirama Arumuga Nainar <pirama@google.com> | 2015-04-09 15:04:38 -0700 |
commit | 4c5e43da7792f75567b693105cc53e3f1992ad98 (patch) | |
tree | 1b2c9792582e12f5af0b1512e3094425f0dc0df9 /lib/CodeGen | |
parent | c75239e6119d0f9a74c57099d91cbc9bde56bf33 (diff) | |
download | external_llvm-4c5e43da7792f75567b693105cc53e3f1992ad98.zip external_llvm-4c5e43da7792f75567b693105cc53e3f1992ad98.tar.gz external_llvm-4c5e43da7792f75567b693105cc53e3f1992ad98.tar.bz2 |
Update aosp/master llvm for rebase to r233350
Change-Id: I07d935f8793ee8ec6b7da003f6483046594bca49
Diffstat (limited to 'lib/CodeGen')
105 files changed, 4644 insertions, 4106 deletions
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index e50b846..8e11fe1 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -312,8 +312,7 @@ static const Value *getNoopInput(const Value *V, // previous aggregate. Combine the two paths to obtain the true address of // our element. ArrayRef<unsigned> ExtractLoc = EVI->getIndices(); - std::copy(ExtractLoc.rbegin(), ExtractLoc.rend(), - std::back_inserter(ValLoc)); + ValLoc.append(ExtractLoc.rbegin(), ExtractLoc.rend()); NoopInput = Op; } // Terminate if we couldn't find anything to look through. @@ -601,10 +600,8 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F, // The manipulations performed when we're looking through an insertvalue or // an extractvalue would happen at the front of the RetPath list, so since // we have to copy it anyway it's more efficient to create a reversed copy. - using std::copy; - SmallVector<unsigned, 4> TmpRetPath, TmpCallPath; - copy(RetPath.rbegin(), RetPath.rend(), std::back_inserter(TmpRetPath)); - copy(CallPath.rbegin(), CallPath.rend(), std::back_inserter(TmpCallPath)); + SmallVector<unsigned, 4> TmpRetPath(RetPath.rbegin(), RetPath.rend()); + SmallVector<unsigned, 4> TmpCallPath(CallPath.rbegin(), CallPath.rend()); // Finally, we can check whether the value produced by the tail call at this // index is compatible with the value we return. diff --git a/lib/CodeGen/Android.mk b/lib/CodeGen/Android.mk index ec3cd77..2827d73 100644 --- a/lib/CodeGen/Android.mk +++ b/lib/CodeGen/Android.mk @@ -21,7 +21,6 @@ codegen_SRC_FILES := \ ExecutionDepsFix.cpp \ ExpandISelPseudos.cpp \ ExpandPostRAPseudos.cpp \ - ForwardControlFlowIntegrity.cpp \ GCMetadata.cpp \ GCMetadataPrinter.cpp \ GCRootLowering.cpp \ @@ -31,7 +30,6 @@ codegen_SRC_FILES := \ InlineSpiller.cpp \ InterferenceCache.cpp \ IntrinsicLowering.cpp \ - JumpInstrTables.cpp \ LatencyPriorityQueue.cpp \ LexicalScopes.cpp \ LiveDebugVariables.cpp \ @@ -53,6 +51,7 @@ codegen_SRC_FILES := \ MachineCombiner.cpp \ MachineCopyPropagation.cpp \ MachineCSE.cpp \ + MachineDominanceFrontier.cpp \ MachineDominators.cpp \ MachineFunctionAnalysis.cpp \ MachineFunction.cpp \ @@ -66,6 +65,7 @@ codegen_SRC_FILES := \ MachineModuleInfoImpls.cpp \ MachinePassRegistry.cpp \ MachinePostDominators.cpp \ + MachineRegionInfo.cpp \ MachineRegisterInfo.cpp \ MachineScheduler.cpp \ MachineSink.cpp \ diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp index 6fe75ad..9a16e15 100644 --- a/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -36,8 +36,7 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; -ARMException::ARMException(AsmPrinter *A) - : EHStreamer(A), shouldEmitCFI(false) {} +ARMException::ARMException(AsmPrinter *A) : DwarfCFIExceptionBase(A) {} ARMException::~ARMException() {} @@ -53,13 +52,9 @@ void ARMException::endModule() { Asm->OutStreamer.EmitCFISections(false, true); } -/// beginFunction - Gather pre-function exception information. Assumes it's -/// being emitted immediately after the function entry point. void ARMException::beginFunction(const MachineFunction *MF) { if (Asm->MAI->getExceptionHandlingType() == ExceptionHandling::ARM) getTargetStreamer().emitFnStart(); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin", - Asm->getFunctionNumber())); // See if we need call frame info. AsmPrinter::CFIMoveType MoveType = Asm->needsCFIMoves(); assert(MoveType != AsmPrinter::CFI_M_EH && @@ -72,20 +67,12 @@ void ARMException::beginFunction(const MachineFunction *MF) { /// endFunction - Gather and emit post-function exception information. /// -void ARMException::endFunction(const MachineFunction *) { - if (shouldEmitCFI) - Asm->OutStreamer.EmitCFIEndProc(); - - // Map all labels and get rid of any dead landing pads. - MMI->TidyLandingPads(); - +void ARMException::endFunction(const MachineFunction *MF) { ARMTargetStreamer &ATS = getTargetStreamer(); if (!Asm->MF->getFunction()->needsUnwindTableEntry() && MMI->getLandingPads().empty()) ATS.emitCantUnwind(); else { - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end", - Asm->getFunctionNumber())); if (!MMI->getLandingPads().empty()) { // Emit references to personality. if (const Function *Personality = MMI->getPersonality()) { diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 988381d..07d6731 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -77,11 +77,11 @@ static gcp_map_type &getGCMap(void *&P) { /// getGVAlignmentLog2 - Return the alignment to use for the specified global /// value in log2 form. This rounds up to the preferred alignment if possible /// and legal. -static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &TD, +static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &DL, unsigned InBits = 0) { unsigned NumBits = 0; if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) - NumBits = TD.getPreferredAlignmentLog(GVar); + NumBits = DL.getPreferredAlignmentLog(GVar); // If InBits is specified, round it to it. if (InBits > NumBits) @@ -103,12 +103,14 @@ static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &TD, AsmPrinter::AsmPrinter(TargetMachine &tm, std::unique_ptr<MCStreamer> Streamer) : MachineFunctionPass(ID), TM(tm), MAI(tm.getMCAsmInfo()), OutContext(Streamer->getContext()), OutStreamer(*Streamer.release()), - LastMI(nullptr), LastFn(0), Counter(~0U), SetCounter(0) { + LastMI(nullptr), LastFn(0), Counter(~0U) { DD = nullptr; MMI = nullptr; LI = nullptr; MF = nullptr; - CurrentFnSym = CurrentFnSymForSize = nullptr; + CurExceptionSym = CurrentFnSym = CurrentFnSymForSize = nullptr; + CurrentFnBegin = nullptr; + CurrentFnEnd = nullptr; GCMetadataPrinters = nullptr; VerboseAsm = OutStreamer.isVerboseAsm(); } @@ -219,9 +221,13 @@ bool AsmPrinter::doInitialization(Module &M) { // Emit module-level inline asm if it exists. if (!M.getModuleInlineAsm().empty()) { + // We're at the module level. Construct MCSubtarget from the default CPU + // and target triple. + std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo( + TM.getTargetTriple(), TM.getTargetCPU(), TM.getTargetFeatureString())); OutStreamer.AddComment("Start of file scope inline assembly"); OutStreamer.AddBlankLine(); - EmitInlineAsm(M.getModuleInlineAsm()+"\n"); + EmitInlineAsm(M.getModuleInlineAsm()+"\n", *STI); OutStreamer.AddComment("End of file scope inline assembly"); OutStreamer.AddBlankLine(); } @@ -525,7 +531,8 @@ void AsmPrinter::EmitFunctionHeader() { EmitVisibility(CurrentFnSym, F->getVisibility()); EmitLinkage(F, CurrentFnSym); - EmitAlignment(MF->getAlignment(), F); + if (MAI->hasFunctionAlignment()) + EmitAlignment(MF->getAlignment(), F); if (MAI->hasDotTypeDotSizeDirective()) OutStreamer.EmitSymbolAttribute(CurrentFnSym, MCSA_ELF_TypeFunction); @@ -554,6 +561,17 @@ void AsmPrinter::EmitFunctionHeader() { OutStreamer.EmitLabel(DeadBlockSyms[i]); } + if (CurrentFnBegin) { + if (MAI->useAssignmentForEHBegin()) { + MCSymbol *CurPos = OutContext.CreateTempSymbol(); + OutStreamer.EmitLabel(CurPos); + OutStreamer.EmitAssignment(CurrentFnBegin, + MCSymbolRefExpr::Create(CurPos, OutContext)); + } else { + OutStreamer.EmitLabel(CurrentFnBegin); + } + } + // Emit pre-function debug and/or EH information. for (const HandlerInfo &HI : Handlers) { NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled); @@ -764,6 +782,8 @@ void AsmPrinter::emitFrameAlloc(const MachineInstr &MI) { /// EmitFunctionBody - This method emits the body and trailer for a /// function. void AsmPrinter::EmitFunctionBody() { + EmitFunctionHeader(); + // Emit target-specific gunk before the function body. EmitFunctionBodyStart(); @@ -867,32 +887,41 @@ void AsmPrinter::EmitFunctionBody() { // Emit target-specific gunk after the function body. EmitFunctionBodyEnd(); + if (!MMI->getLandingPads().empty() || MMI->hasDebugInfo() || + MAI->hasDotTypeDotSizeDirective()) { + // Create a symbol for the end of function. + CurrentFnEnd = createTempSymbol("func_end"); + OutStreamer.EmitLabel(CurrentFnEnd); + } + // If the target wants a .size directive for the size of the function, emit // it. if (MAI->hasDotTypeDotSizeDirective()) { - // Create a symbol for the end of function, so we can get the size as - // difference between the function label and the temp label. - MCSymbol *FnEndLabel = OutContext.CreateTempSymbol(); - OutStreamer.EmitLabel(FnEndLabel); - + // We can get the size as difference between the function label and the + // temp label. const MCExpr *SizeExp = - MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(FnEndLabel, OutContext), + MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(CurrentFnEnd, OutContext), MCSymbolRefExpr::Create(CurrentFnSymForSize, OutContext), OutContext); OutStreamer.EmitELFSize(CurrentFnSym, SizeExp); } - // Emit post-function debug and/or EH information. for (const HandlerInfo &HI : Handlers) { NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled); - HI.Handler->endFunction(MF); + HI.Handler->markFunctionEnd(); } - MMI->EndFunction(); // Print out jump tables referenced by the function. EmitJumpTableInfo(); + // Emit post-function debug and/or EH information. + for (const HandlerInfo &HI : Handlers) { + NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled); + HI.Handler->endFunction(MF); + } + MMI->EndFunction(); + OutStreamer.AddBlankLine(); } @@ -928,7 +957,7 @@ static bool isGOTEquivalentCandidate(const GlobalVariable *GV, // To be a got equivalent, at least one of its users need to be a constant // expression used by another global variable. for (auto *U : GV->users()) - NumGOTEquivUsers += getNumGlobalVariableUses(cast<Constant>(U)); + NumGOTEquivUsers += getNumGlobalVariableUses(dyn_cast<Constant>(U)); return NumGOTEquivUsers > 0; } @@ -961,17 +990,25 @@ void AsmPrinter::emitGlobalGOTEquivs() { if (!getObjFileLowering().supportIndirectSymViaGOTPCRel()) return; - while (!GlobalGOTEquivs.empty()) { - DenseMap<const MCSymbol *, GOTEquivUsePair>::iterator I = - GlobalGOTEquivs.begin(); - const MCSymbol *S = I->first; - const GlobalVariable *GV = I->second.first; - GlobalGOTEquivs.erase(S); - EmitGlobalVariable(GV); + SmallVector<const GlobalVariable *, 8> FailedCandidates; + for (auto &I : GlobalGOTEquivs) { + const GlobalVariable *GV = I.second.first; + unsigned Cnt = I.second.second; + if (Cnt) + FailedCandidates.push_back(GV); } + GlobalGOTEquivs.clear(); + + for (auto *GV : FailedCandidates) + EmitGlobalVariable(GV); } bool AsmPrinter::doFinalization(Module &M) { + // Set the MachineFunction to nullptr so that we can catch attempted + // accesses to MF specific features at the module level and so that + // we can conditionalize accesses based on whether or not it is nullptr. + MF = nullptr; + // Gather all GOT equivalent globals in the module. We really need two // passes over the globals: one to compute and another to avoid its emission // in EmitGlobalVariable, otherwise we would not be able to handle cases @@ -997,59 +1034,6 @@ bool AsmPrinter::doFinalization(Module &M) { EmitVisibility(Name, V, false); } - // Get information about jump-instruction tables to print. - JumpInstrTableInfo *JITI = getAnalysisIfAvailable<JumpInstrTableInfo>(); - - if (JITI && !JITI->getTables().empty()) { - // Since we're at the module level we can't use a function specific - // MCSubtargetInfo - instead create one with the module defaults. - std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo( - TM.getTargetTriple(), TM.getTargetCPU(), TM.getTargetFeatureString())); - unsigned Arch = Triple(getTargetTriple()).getArch(); - bool IsThumb = (Arch == Triple::thumb || Arch == Triple::thumbeb); - const TargetInstrInfo *TII = TM.getSubtargetImpl()->getInstrInfo(); - MCInst TrapInst; - TII->getTrap(TrapInst); - unsigned LogAlignment = llvm::Log2_64(JITI->entryByteAlignment()); - - // Emit the right section for these functions. - OutStreamer.SwitchSection(OutContext.getObjectFileInfo()->getTextSection()); - for (const auto &KV : JITI->getTables()) { - uint64_t Count = 0; - for (const auto &FunPair : KV.second) { - // Emit the function labels to make this be a function entry point. - MCSymbol *FunSym = - OutContext.GetOrCreateSymbol(FunPair.second->getName()); - EmitAlignment(LogAlignment); - if (IsThumb) - OutStreamer.EmitThumbFunc(FunSym); - if (MAI->hasDotTypeDotSizeDirective()) - OutStreamer.EmitSymbolAttribute(FunSym, MCSA_ELF_TypeFunction); - OutStreamer.EmitLabel(FunSym); - - // Emit the jump instruction to transfer control to the original - // function. - MCInst JumpToFun; - MCSymbol *TargetSymbol = - OutContext.GetOrCreateSymbol(FunPair.first->getName()); - const MCSymbolRefExpr *TargetSymRef = - MCSymbolRefExpr::Create(TargetSymbol, MCSymbolRefExpr::VK_PLT, - OutContext); - TII->getUnconditionalBranch(JumpToFun, TargetSymRef); - OutStreamer.EmitInstruction(JumpToFun, *STI); - ++Count; - } - - // Emit enough padding instructions to fill up to the next power of two. - uint64_t Remaining = NextPowerOf2(Count) - Count; - for (uint64_t C = 0; C < Remaining; ++C) { - EmitAlignment(LogAlignment); - OutStreamer.EmitInstruction(TrapInst, *STI); - } - - } - } - // Emit module flags. SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags; M.getModuleFlagsMetadata(ModuleFlags); @@ -1152,11 +1136,26 @@ bool AsmPrinter::doFinalization(Module &M) { return false; } +MCSymbol *AsmPrinter::getCurExceptionSym() { + if (!CurExceptionSym) + CurExceptionSym = createTempSymbol("exception"); + return CurExceptionSym; +} + void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { this->MF = &MF; // Get the function symbol. CurrentFnSym = getSymbol(MF.getFunction()); CurrentFnSymForSize = CurrentFnSym; + CurrentFnBegin = nullptr; + CurExceptionSym = nullptr; + bool NeedsLocalForSize = MAI->needsLocalForSize(); + if (!MMI->getLandingPads().empty() || MMI->hasDebugInfo() || + NeedsLocalForSize) { + CurrentFnBegin = createTempSymbol("func_begin"); + if (NeedsLocalForSize) + CurrentFnSymForSize = CurrentFnBegin; + } if (isVerbose()) LI = &getAnalysis<MachineLoopInfo>(); @@ -1273,10 +1272,8 @@ void AsmPrinter::EmitJumpTableInfo() { bool JTInDiffSection = !TLOF.shouldPutJumpTableInFunctionSection( MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32, *F); - if (!JTInDiffSection) { - OutStreamer.SwitchSection(TLOF.SectionForGlobal(F, *Mang, TM)); - } else { - // Otherwise, drop it in the readonly section. + if (JTInDiffSection) { + // Drop it in the readonly section. const MCSection *ReadOnlySection = TLOF.getSectionForJumpTable(*F, *Mang, TM); OutStreamer.SwitchSection(ReadOnlySection); @@ -1585,7 +1582,7 @@ void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo, } // Otherwise, emit with .set (aka assignment). - MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++); + MCSymbol *SetLabel = createTempSymbol("set"); OutStreamer.EmitAssignment(SetLabel, Diff); OutStreamer.EmitSymbolValue(SetLabel, Size); } @@ -1667,8 +1664,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { // If the code isn't optimized, there may be outstanding folding // opportunities. Attempt to fold the expression using DataLayout as a // last resort before giving up. - if (Constant *C = ConstantFoldConstantExpression( - CE, TM.getDataLayout())) + if (Constant *C = ConstantFoldConstantExpression(CE, *TM.getDataLayout())) if (C != CE) return lowerConstant(C); @@ -2112,9 +2108,15 @@ static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME, // // gotpcrelcst := <offset from @foo base> + <cst> // + // If gotpcrelcst is positive it means that we can safely fold the pc rel + // displacement into the GOTPCREL. We can also can have an extra offset <cst> + // if the target knows how to encode it. + // int64_t GOTPCRelCst = Offset + MV.getConstant(); if (GOTPCRelCst < 0) return; + if (!AP.getObjFileLowering().supportGOTPCRelWithOffset() && GOTPCRelCst != 0) + return; // Emit the GOT PC relative to replace the got equivalent global, i.e.: // @@ -2134,18 +2136,16 @@ static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME, // AsmPrinter::GOTEquivUsePair Result = AP.GlobalGOTEquivs[GOTEquivSym]; const GlobalVariable *GV = Result.first; - unsigned NumUses = Result.second; + int NumUses = (int)Result.second; const GlobalValue *FinalGV = dyn_cast<GlobalValue>(GV->getOperand(0)); const MCSymbol *FinalSym = AP.getSymbol(FinalGV); - *ME = AP.getObjFileLowering().getIndirectSymViaGOTPCRel(FinalSym, - GOTPCRelCst); + *ME = AP.getObjFileLowering().getIndirectSymViaGOTPCRel( + FinalSym, MV, Offset, AP.MMI, AP.OutStreamer); // Update GOT equivalent usage information --NumUses; - if (NumUses) + if (NumUses >= 0) AP.GlobalGOTEquivs[GOTEquivSym] = std::make_pair(GV, NumUses); - else - AP.GlobalGOTEquivs.erase(GOTEquivSym); } static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP, @@ -2206,7 +2206,7 @@ static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP, // If the constant expression's size is greater than 64-bits, then we have // to emit the value in chunks. Try to constant fold the value and emit it // that way. - Constant *New = ConstantFoldConstantExpression(CE, DL); + Constant *New = ConstantFoldConstantExpression(CE, *DL); if (New && New != CE) return emitGlobalConstantImpl(New, AP); } @@ -2257,23 +2257,10 @@ void AsmPrinter::printOffset(int64_t Offset, raw_ostream &OS) const { // Symbol Lowering Routines. //===----------------------------------------------------------------------===// -/// GetTempSymbol - Return the MCSymbol corresponding to the assembler -/// temporary label with the specified stem and unique ID. -MCSymbol *AsmPrinter::GetTempSymbol(const Twine &Name, unsigned ID) const { - const DataLayout *DL = TM.getDataLayout(); - return OutContext.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix()) + - Name + Twine(ID)); -} - -/// GetTempSymbol - Return an assembler temporary label with the specified -/// stem. -MCSymbol *AsmPrinter::GetTempSymbol(const Twine &Name) const { - const DataLayout *DL = TM.getDataLayout(); - return OutContext.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+ - Name); +MCSymbol *AsmPrinter::createTempSymbol(const Twine &Name) const { + return OutContext.createTempSymbol(Name, true); } - MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BlockAddress *BA) const { return MMI->getAddrLabelSymbol(BA->getBasicBlock()); } @@ -2523,3 +2510,5 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) { /// Pin vtable to this file. AsmPrinterHandler::~AsmPrinterHandler() {} + +void AsmPrinterHandler::markFunctionEnd() {} diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index d0958c1..9de36da 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -12,9 +12,12 @@ //===----------------------------------------------------------------------===// #include "ByteStreamer.h" +#include "DwarfDebug.h" #include "DwarfExpression.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DIE.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCAsmInfo.h" @@ -27,29 +30,11 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "asm-printer" -void DebugLocDwarfExpression::EmitOp(uint8_t Op, const char *Comment) { - BS.EmitInt8( - Op, Comment ? Twine(Comment) + " " + dwarf::OperationEncodingString(Op) - : dwarf::OperationEncodingString(Op)); -} - -void DebugLocDwarfExpression::EmitSigned(int Value) { - BS.EmitSLEB128(Value, Twine(Value)); -} - -void DebugLocDwarfExpression::EmitUnsigned(unsigned Value) { - BS.EmitULEB128(Value, Twine(Value)); -} - -bool DebugLocDwarfExpression::isFrameRegister(unsigned MachineReg) { - // This information is not available while emitting .debug_loc entries. - return false; -} - //===----------------------------------------------------------------------===// // Dwarf Emission Helper Routines //===----------------------------------------------------------------------===// @@ -178,57 +163,28 @@ void AsmPrinter::EmitTTypeReference(const GlobalValue *GV, /// /// SectionLabel is a temporary label emitted at the start of the section that /// Label lives in. -void AsmPrinter::EmitSectionOffset(const MCSymbol *Label, - const MCSymbol *SectionLabel) const { +void AsmPrinter::emitSectionOffset(const MCSymbol *Label) const { // On COFF targets, we have to emit the special .secrel32 directive. if (MAI->needsDwarfSectionOffsetDirective()) { OutStreamer.EmitCOFFSecRel32(Label); return; } - // Get the section that we're referring to, based on SectionLabel. - const MCSection &Section = SectionLabel->getSection(); - - // If Label has already been emitted, verify that it is in the same section as - // section label for sanity. - assert((!Label->isInSection() || &Label->getSection() == &Section) && - "Section offset using wrong section base for label"); - - // If the section in question will end up with an address of 0 anyway, we can - // just emit an absolute reference to save a relocation. - if (Section.isBaseAddressKnownZero()) { + // If the format uses relocations with dwarf, refer to the symbol directly. + if (MAI->doesDwarfUseRelocationsAcrossSections()) { OutStreamer.EmitSymbolValue(Label, 4); return; } // Otherwise, emit it as a label difference from the start of the section. - EmitLabelDifference(Label, SectionLabel, 4); -} - -// Some targets do not provide a DWARF register number for every -// register. This function attempts to emit a DWARF register by -// emitting a piece of a super-register or by piecing together -// multiple subregisters that alias the register. -void AsmPrinter::EmitDwarfRegOpPiece(ByteStreamer &Streamer, - const MachineLocation &MLoc, - unsigned PieceSizeInBits, - unsigned PieceOffsetInBits) const { - assert(MLoc.isReg() && "MLoc must be a register"); - DebugLocDwarfExpression Expr(*this, Streamer); - Expr.AddMachineRegPiece(MLoc.getReg(), PieceSizeInBits, PieceOffsetInBits); -} - -void AsmPrinter::EmitDwarfOpPiece(ByteStreamer &Streamer, - unsigned PieceSizeInBits, - unsigned PieceOffsetInBits) const { - DebugLocDwarfExpression Expr(*this, Streamer); - Expr.AddOpPiece(PieceSizeInBits, PieceOffsetInBits); + EmitLabelDifference(Label, Label->getSection().getBeginSymbol(), 4); } /// EmitDwarfRegOp - Emit dwarf register operation. void AsmPrinter::EmitDwarfRegOp(ByteStreamer &Streamer, const MachineLocation &MLoc) const { - DebugLocDwarfExpression Expr(*this, Streamer); + DebugLocDwarfExpression Expr(*MF->getSubtarget().getRegisterInfo(), + getDwarfDebug()->getDwarfVersion(), Streamer); const MCRegisterInfo *MRI = MMI->getContext().getRegisterInfo(); int Reg = MRI->getDwarfRegNum(MLoc.getReg(), false); if (Reg < 0) { @@ -285,3 +241,60 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const { break; } } + +void AsmPrinter::emitDwarfDIE(const DIE &Die) const { + // Get the abbreviation for this DIE. + const DIEAbbrev &Abbrev = Die.getAbbrev(); + + // Emit the code (index) for the abbreviation. + if (isVerbose()) + OutStreamer.AddComment("Abbrev [" + Twine(Abbrev.getNumber()) + + "] 0x" + Twine::utohexstr(Die.getOffset()) + + ":0x" + Twine::utohexstr(Die.getSize()) + " " + + dwarf::TagString(Abbrev.getTag())); + EmitULEB128(Abbrev.getNumber()); + + const SmallVectorImpl<DIEValue *> &Values = Die.getValues(); + const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData(); + + // Emit the DIE attribute values. + for (unsigned i = 0, N = Values.size(); i < N; ++i) { + dwarf::Attribute Attr = AbbrevData[i].getAttribute(); + dwarf::Form Form = AbbrevData[i].getForm(); + assert(Form && "Too many attributes for DIE (check abbreviation)"); + + if (isVerbose()) { + OutStreamer.AddComment(dwarf::AttributeString(Attr)); + if (Attr == dwarf::DW_AT_accessibility) + OutStreamer.AddComment(dwarf::AccessibilityString( + cast<DIEInteger>(Values[i])->getValue())); + } + + // Emit an attribute using the defined form. + Values[i]->EmitValue(this, Form); + } + + // Emit the DIE children if any. + if (Abbrev.hasChildren()) { + for (auto &Child : Die.getChildren()) + emitDwarfDIE(*Child); + + OutStreamer.AddComment("End Of Children Mark"); + EmitInt8(0); + } +} + +void +AsmPrinter::emitDwarfAbbrevs(const std::vector<DIEAbbrev *>& Abbrevs) const { + // For each abbrevation. + for (const DIEAbbrev *Abbrev : Abbrevs) { + // Emit the abbrevations code (base 1 index.) + EmitULEB128(Abbrev->getNumber(), "Abbreviation Code"); + + // Emit the abbreviations data. + Abbrev->Emit(this); + } + + // Mark end of abbreviations. + EmitULEB128(0, "EOM(3)"); +} diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h b/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h index 31867dd..f1efe9d 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h +++ b/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h @@ -41,6 +41,10 @@ public: /// call. virtual void beginFunction(const MachineFunction *MF) = 0; + // \brief Emit any of function marker (like .cfi_endproc). This is called + // before endFunction and cannot switch sections. + virtual void markFunctionEnd(); + /// \brief Gather post-function debug information. /// Please note that some AsmPrinter implementations may not call /// beginFunction at all. diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index e6e7c97..bf63b1b 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -73,7 +73,8 @@ static void srcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) { } /// EmitInlineAsm - Emit a blob of inline asm to the output streamer. -void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode, +void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI, + const MDNode *LocMDNode, InlineAsm::AsmDialect Dialect) const { assert(!Str.empty() && "Can't emit empty inline asm block"); @@ -93,17 +94,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode, !OutStreamer.isIntegratedAssemblerRequired()) { emitInlineAsmStart(); OutStreamer.EmitRawText(Str); - // If we have a machine function then grab the MCSubtarget off of that, - // otherwise we're at the module level and want to construct one from - // the default CPU and target triple. - if (MF) { - emitInlineAsmEnd(MF->getSubtarget<MCSubtargetInfo>(), nullptr); - } else { - std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo( - TM.getTargetTriple(), TM.getTargetCPU(), - TM.getTargetFeatureString())); - emitInlineAsmEnd(*STI, nullptr); - } + emitInlineAsmEnd(STI, nullptr); return; } @@ -135,19 +126,11 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode, std::unique_ptr<MCAsmParser> Parser( createMCAsmParser(SrcMgr, OutContext, OutStreamer, *MAI)); - // Initialize the parser with a fresh subtarget info. It is better to use a - // new STI here because the parser may modify it and we do not want those - // modifications to persist after parsing the inlineasm. The modifications - // made by the parser will be seen by the code emitters because it passes - // the current STI down to the EncodeInstruction() method. - std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo( - TM.getTargetTriple(), TM.getTargetCPU(), TM.getTargetFeatureString())); - - // Preserve a copy of the original STI because the parser may modify it. For - // example, when switching between arm and thumb mode. If the target needs to - // emit code to return to the original state it can do so in + // Create a temporary copy of the original STI because the parser may modify + // it. For example, when switching between arm and thumb mode. If the target + // needs to emit code to return to the original state it can do so in // emitInlineAsmEnd(). - MCSubtargetInfo STIOrig = *STI; + MCSubtargetInfo TmpSTI = STI; // We create a new MCInstrInfo here since we might be at the module level // and not have a MachineFunction to initialize the TargetInstrInfo from and @@ -155,7 +138,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode, // because it's not subtarget dependent. std::unique_ptr<MCInstrInfo> MII(TM.getTarget().createMCInstrInfo()); std::unique_ptr<MCTargetAsmParser> TAP(TM.getTarget().createMCAsmParser( - *STI, *Parser, *MII, TM.Options.MCOptions)); + TmpSTI, *Parser, *MII, TM.Options.MCOptions)); if (!TAP) report_fatal_error("Inline asm not supported by this streamer because" " we don't have an asm parser for this target\n"); @@ -170,7 +153,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode, // Don't implicitly switch to the text section before the asm. int Res = Parser->Run(/*NoInitialTextSection*/ true, /*NoFinalize*/ true); - emitInlineAsmEnd(STIOrig, STI.get()); + emitInlineAsmEnd(STI, &TmpSTI); if (Res && !HasDiagHandler) report_fatal_error("Error parsing inline asm\n"); } @@ -505,7 +488,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { else EmitMSInlineAsmStr(AsmStr, MI, MMI, InlineAsmVariant, AP, LocCookie, OS); - EmitInlineAsm(OS.str(), LocMD, MI->getInlineAsmDialect()); + EmitInlineAsm(OS.str(), getSubtargetInfo(), LocMD, MI->getInlineAsmDialect()); // Emit the #NOAPP end marker. This has to happen even if verbose-asm isn't // enabled, so we use emitRawComment. diff --git a/lib/CodeGen/AsmPrinter/ByteStreamer.h b/lib/CodeGen/AsmPrinter/ByteStreamer.h index 42be114..179a4d4 100644 --- a/lib/CodeGen/AsmPrinter/ByteStreamer.h +++ b/lib/CodeGen/AsmPrinter/ByteStreamer.h @@ -19,6 +19,8 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/Support/LEB128.h" +#include <string> namespace llvm { class ByteStreamer { @@ -66,6 +68,33 @@ class HashingByteStreamer : public ByteStreamer { Hash.addULEB128(DWord); } }; + +class BufferByteStreamer : public ByteStreamer { +private: + SmallVectorImpl<char> &Buffer; + // FIXME: This is actually only needed for textual asm output. + SmallVectorImpl<std::string> &Comments; + +public: + BufferByteStreamer(SmallVectorImpl<char> &Buffer, + SmallVectorImpl<std::string> &Comments) + : Buffer(Buffer), Comments(Comments) {} + void EmitInt8(uint8_t Byte, const Twine &Comment) override { + Buffer.push_back(Byte); + Comments.push_back(Comment.str()); + } + void EmitSLEB128(uint64_t DWord, const Twine &Comment) override { + raw_svector_ostream OSE(Buffer); + encodeSLEB128(DWord, OSE); + Comments.push_back(Comment.str()); + } + void EmitULEB128(uint64_t DWord, const Twine &Comment) override { + raw_svector_ostream OSE(Buffer); + encodeULEB128(DWord, OSE); + Comments.push_back(Comment.str()); + } +}; + } #endif diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index 64ba56b..1a706f7 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -28,6 +28,7 @@ #include "llvm/Support/FormattedStream.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/MD5.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -60,7 +61,7 @@ void DIEAbbrev::Profile(FoldingSetNodeID &ID) const { /// Emit - Print the abbreviation using the specified asm printer. /// -void DIEAbbrev::Emit(AsmPrinter *AP) const { +void DIEAbbrev::Emit(const AsmPrinter *AP) const { // Emit its Dwarf tag type. AP->EmitULEB128(Tag, dwarf::TagString(Tag)); @@ -204,7 +205,7 @@ void DIEValue::dump() const { /// EmitValue - Emit integer of appropriate size. /// -void DIEInteger::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const { +void DIEInteger::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const { unsigned Size = ~0U; switch (Form) { case dwarf::DW_FORM_flag_present: @@ -218,6 +219,7 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const { case dwarf::DW_FORM_ref2: // Fall thru case dwarf::DW_FORM_data2: Size = 2; break; case dwarf::DW_FORM_sec_offset: // Fall thru + case dwarf::DW_FORM_strp: // Fall thru case dwarf::DW_FORM_ref4: // Fall thru case dwarf::DW_FORM_data4: Size = 4; break; case dwarf::DW_FORM_ref8: // Fall thru @@ -229,6 +231,9 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const { case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return; case dwarf::DW_FORM_addr: Size = Asm->getDataLayout().getPointerSize(); break; + case dwarf::DW_FORM_ref_addr: + Size = SizeOf(Asm, dwarf::DW_FORM_ref_addr); + break; default: llvm_unreachable("DIE Value form not supported yet"); } Asm->OutStreamer.EmitIntValue(Integer, Size); @@ -236,7 +241,7 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const { /// SizeOf - Determine size of integer value in bytes. /// -unsigned DIEInteger::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { +unsigned DIEInteger::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { switch (Form) { case dwarf::DW_FORM_flag_present: return 0; case dwarf::DW_FORM_flag: // Fall thru @@ -245,6 +250,7 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { case dwarf::DW_FORM_ref2: // Fall thru case dwarf::DW_FORM_data2: return sizeof(int16_t); case dwarf::DW_FORM_sec_offset: // Fall thru + case dwarf::DW_FORM_strp: // Fall thru case dwarf::DW_FORM_ref4: // Fall thru case dwarf::DW_FORM_data4: return sizeof(int32_t); case dwarf::DW_FORM_ref8: // Fall thru @@ -255,6 +261,10 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { case dwarf::DW_FORM_udata: return getULEB128Size(Integer); case dwarf::DW_FORM_sdata: return getSLEB128Size(Integer); case dwarf::DW_FORM_addr: return AP->getDataLayout().getPointerSize(); + case dwarf::DW_FORM_ref_addr: + if (AP->OutStreamer.getContext().getDwarfVersion() == 2) + return AP->getDataLayout().getPointerSize(); + return sizeof(int32_t); default: llvm_unreachable("DIE Value form not supported yet"); } } @@ -272,13 +282,13 @@ void DIEInteger::print(raw_ostream &O) const { /// EmitValue - Emit expression value. /// -void DIEExpr::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { +void DIEExpr::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { AP->OutStreamer.EmitValue(Expr, SizeOf(AP, Form)); } /// SizeOf - Determine size of expression value in bytes. /// -unsigned DIEExpr::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { +unsigned DIEExpr::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_data4) return 4; if (Form == dwarf::DW_FORM_sec_offset) return 4; if (Form == dwarf::DW_FORM_strp) return 4; @@ -298,7 +308,7 @@ void DIEExpr::print(raw_ostream &O) const { /// EmitValue - Emit label value. /// -void DIELabel::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { +void DIELabel::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { AP->EmitLabelReference(Label, SizeOf(AP, Form), Form == dwarf::DW_FORM_strp || Form == dwarf::DW_FORM_sec_offset || @@ -307,7 +317,7 @@ void DIELabel::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { /// SizeOf - Determine size of label value in bytes. /// -unsigned DIELabel::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { +unsigned DIELabel::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_data4) return 4; if (Form == dwarf::DW_FORM_sec_offset) return 4; if (Form == dwarf::DW_FORM_strp) return 4; @@ -326,13 +336,13 @@ void DIELabel::print(raw_ostream &O) const { /// EmitValue - Emit delta value. /// -void DIEDelta::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { +void DIEDelta::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { AP->EmitLabelDifference(LabelHi, LabelLo, SizeOf(AP, Form)); } /// SizeOf - Determine size of delta value in bytes. /// -unsigned DIEDelta::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { +unsigned DIEDelta::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_data4) return 4; if (Form == dwarf::DW_FORM_sec_offset) return 4; if (Form == dwarf::DW_FORM_strp) return 4; @@ -351,13 +361,13 @@ void DIEDelta::print(raw_ostream &O) const { /// EmitValue - Emit string value. /// -void DIEString::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { +void DIEString::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { Access->EmitValue(AP, Form); } /// SizeOf - Determine size of delta value in bytes. /// -unsigned DIEString::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { +unsigned DIEString::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { return Access->SizeOf(AP, Form); } @@ -372,32 +382,9 @@ void DIEString::print(raw_ostream &O) const { // DIEEntry Implementation //===----------------------------------------------------------------------===// -/// Emit something like ".long Hi+Offset-Lo" where the size in bytes of the -/// directive is specified by Size and Hi/Lo specify the labels. -static void emitLabelOffsetDifference(MCStreamer &Streamer, const MCSymbol *Hi, - uint64_t Offset, const MCSymbol *Lo, - unsigned Size) { - MCContext &Context = Streamer.getContext(); - - // Emit Hi+Offset - Lo - // Get the Hi+Offset expression. - const MCExpr *Plus = - MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Hi, Context), - MCConstantExpr::Create(Offset, Context), Context); - - // Get the Hi+Offset-Lo expression. - const MCExpr *Diff = MCBinaryExpr::CreateSub( - Plus, MCSymbolRefExpr::Create(Lo, Context), Context); - - // Otherwise, emit with .set (aka assignment). - MCSymbol *SetLabel = Context.CreateTempSymbol(); - Streamer.EmitAssignment(SetLabel, Diff); - Streamer.EmitSymbolValue(SetLabel, Size); -} - /// EmitValue - Emit debug information entry offset. /// -void DIEEntry::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { +void DIEEntry::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_ref_addr) { const DwarfDebug *DD = AP->getDwarfDebug(); @@ -413,14 +400,12 @@ void DIEEntry::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { AP->EmitLabelPlusOffset(CU->getSectionSym(), Addr, DIEEntry::getRefAddrSize(AP)); else - emitLabelOffsetDifference(AP->OutStreamer, CU->getSectionSym(), Addr, - CU->getSectionSym(), - DIEEntry::getRefAddrSize(AP)); + AP->OutStreamer.EmitIntValue(Addr, DIEEntry::getRefAddrSize(AP)); } else AP->EmitInt32(Entry.getOffset()); } -unsigned DIEEntry::getRefAddrSize(AsmPrinter *AP) { +unsigned DIEEntry::getRefAddrSize(const AsmPrinter *AP) { // DWARF4: References that use the attribute form DW_FORM_ref_addr are // specified to be four bytes in the DWARF 32-bit format and eight bytes // in the DWARF 64-bit format, while DWARF Version 2 specifies that such @@ -441,7 +426,7 @@ void DIEEntry::print(raw_ostream &O) const { //===----------------------------------------------------------------------===// // DIETypeSignature Implementation //===----------------------------------------------------------------------===// -void DIETypeSignature::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const { +void DIETypeSignature::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const { assert(Form == dwarf::DW_FORM_ref_sig8); Asm->OutStreamer.EmitIntValue(Unit.getTypeSignature(), 8); } @@ -460,7 +445,7 @@ void DIETypeSignature::dump() const { print(dbgs()); } /// ComputeSize - calculate the size of the location expression. /// -unsigned DIELoc::ComputeSize(AsmPrinter *AP) const { +unsigned DIELoc::ComputeSize(const AsmPrinter *AP) const { if (!Size) { const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData(); for (unsigned i = 0, N = Values.size(); i < N; ++i) @@ -472,7 +457,7 @@ unsigned DIELoc::ComputeSize(AsmPrinter *AP) const { /// EmitValue - Emit location data. /// -void DIELoc::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const { +void DIELoc::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const { switch (Form) { default: llvm_unreachable("Improper form for block"); case dwarf::DW_FORM_block1: Asm->EmitInt8(Size); break; @@ -490,7 +475,7 @@ void DIELoc::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const { /// SizeOf - Determine size of location data in bytes. /// -unsigned DIELoc::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { +unsigned DIELoc::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { switch (Form) { case dwarf::DW_FORM_block1: return Size + sizeof(int8_t); case dwarf::DW_FORM_block2: return Size + sizeof(int16_t); @@ -515,7 +500,7 @@ void DIELoc::print(raw_ostream &O) const { /// ComputeSize - calculate the size of the block. /// -unsigned DIEBlock::ComputeSize(AsmPrinter *AP) const { +unsigned DIEBlock::ComputeSize(const AsmPrinter *AP) const { if (!Size) { const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData(); for (unsigned i = 0, N = Values.size(); i < N; ++i) @@ -527,7 +512,7 @@ unsigned DIEBlock::ComputeSize(AsmPrinter *AP) const { /// EmitValue - Emit block data. /// -void DIEBlock::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const { +void DIEBlock::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const { switch (Form) { default: llvm_unreachable("Improper form for block"); case dwarf::DW_FORM_block1: Asm->EmitInt8(Size); break; @@ -543,7 +528,7 @@ void DIEBlock::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const { /// SizeOf - Determine size of block data in bytes. /// -unsigned DIEBlock::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { +unsigned DIEBlock::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { switch (Form) { case dwarf::DW_FORM_block1: return Size + sizeof(int8_t); case dwarf::DW_FORM_block2: return Size + sizeof(int16_t); @@ -564,7 +549,7 @@ void DIEBlock::print(raw_ostream &O) const { // DIELocList Implementation //===----------------------------------------------------------------------===// -unsigned DIELocList::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { +unsigned DIELocList::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_data4) return 4; if (Form == dwarf::DW_FORM_sec_offset) @@ -574,14 +559,14 @@ unsigned DIELocList::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { /// EmitValue - Emit label value. /// -void DIELocList::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { +void DIELocList::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { DwarfDebug *DD = AP->getDwarfDebug(); MCSymbol *Label = DD->getDebugLocEntries()[Index].Label; if (AP->MAI->doesDwarfUseRelocationsAcrossSections() && !DD->useSplitDwarf()) - AP->EmitSectionOffset(Label, DD->getDebugLocSym()); + AP->emitSectionOffset(Label); else - AP->EmitLabelDifference(Label, DD->getDebugLocSym(), 4); + AP->EmitLabelDifference(Label, Label->getSection().getBeginSymbol(), 4); } #ifndef NDEBUG diff --git a/lib/CodeGen/AsmPrinter/DIEHash.cpp b/lib/CodeGen/AsmPrinter/DIEHash.cpp index 1e2ba2c..da7252a 100644 --- a/lib/CodeGen/AsmPrinter/DIEHash.cpp +++ b/lib/CodeGen/AsmPrinter/DIEHash.cpp @@ -510,7 +510,7 @@ uint64_t DIEHash::computeDIEODRSignature(const DIE &Die) { // ... take the least significant 8 bytes and return those. Our MD5 // implementation always returns its results in little endian, swap bytes // appropriately. - return *reinterpret_cast<support::ulittle64_t *>(Result + 8); + return support::endian::read64le(Result + 8); } /// This is based on the type signature computation given in section 7.27 of the @@ -531,7 +531,7 @@ uint64_t DIEHash::computeCUSignature(const DIE &Die) { // ... take the least significant 8 bytes and return those. Our MD5 // implementation always returns its results in little endian, swap bytes // appropriately. - return *reinterpret_cast<support::ulittle64_t *>(Result + 8); + return support::endian::read64le(Result + 8); } /// This is based on the type signature computation given in section 7.27 of the @@ -555,5 +555,5 @@ uint64_t DIEHash::computeTypeSignature(const DIE &Die) { // ... take the least significant 8 bytes and return those. Our MD5 // implementation always returns its results in little endian, swap bytes // appropriately. - return *reinterpret_cast<support::ulittle64_t *>(Result + 8); + return support::endian::read64le(Result + 8); } diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp index 0c2a5e5..bbdf237 100644 --- a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp +++ b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp @@ -14,6 +14,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/IR/DebugInfo.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" #include <algorithm> #include <map> diff --git a/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/lib/CodeGen/AsmPrinter/DebugLocEntry.h index 6d55c03..6914bbe 100644 --- a/lib/CodeGen/AsmPrinter/DebugLocEntry.h +++ b/lib/CodeGen/AsmPrinter/DebugLocEntry.h @@ -9,22 +9,24 @@ #ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H #define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H +#include "llvm/ADT/SmallString.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MachineLocation.h" namespace llvm { +class AsmPrinter; class MDNode; /// \brief This struct describes location entries emitted in the .debug_loc /// section. class DebugLocEntry { - // Begin and end symbols for the address range that this location is valid. + /// Begin and end symbols for the address range that this location is valid. const MCSymbol *Begin; const MCSymbol *End; public: - /// A single location or constant. + /// \brief A single location or constant. struct Value { Value(const MDNode *Var, const MDNode *Expr, int64_t i) : Variable(Var), Expression(Expr), EntryKind(E_Integer) { @@ -41,20 +43,20 @@ public: Value(const MDNode *Var, const MDNode *Expr, MachineLocation Loc) : Variable(Var), Expression(Expr), EntryKind(E_Location), Loc(Loc) { assert(DIVariable(Var).Verify()); - assert(DIExpression(Expr).Verify()); + assert(DIExpression(Expr)->isValid()); } - // The variable to which this location entry corresponds. + /// The variable to which this location entry corresponds. const MDNode *Variable; - // Any complex address location expression for this Value. + /// Any complex address location expression for this Value. const MDNode *Expression; - // Type of entry that this represents. + /// Type of entry that this represents. enum EntryType { E_Location, E_Integer, E_ConstantFP, E_ConstantInt }; enum EntryType EntryKind; - // Either a constant, + /// Either a constant, union { int64_t Int; const ConstantFP *CFP; @@ -84,6 +86,8 @@ private: /// A nonempty list of locations/constants belonging to this entry, /// sorted by offset. SmallVector<Value, 1> Values; + SmallString<8> DWARFBytes; + SmallVector<std::string, 1> Comments; public: DebugLocEntry(const MCSymbol *B, const MCSymbol *E, Value Val) @@ -92,9 +96,9 @@ public: } /// \brief If this and Next are describing different pieces of the same - // variable, merge them by appending Next's values to the current - // list of values. - // Return true if the merge was successful. + /// variable, merge them by appending Next's values to the current + /// list of values. + /// Return true if the merge was successful. bool MergeValues(const DebugLocEntry &Next) { if (Begin == Next.Begin) { DIExpression Expr(Values[0].Expression); @@ -135,7 +139,7 @@ public: }) && "value must be a piece"); } - // Sort the pieces by offset. + // \brief Sort the pieces by offset. // Remove any duplicate entries by dropping all but the first. void sortUniqueValues() { std::sort(Values.begin(), Values.end()); @@ -146,9 +150,18 @@ public: }), Values.end()); } + + /// \brief Lower this entry into a DWARF expression. + void finalize(const AsmPrinter &AP, + const DITypeIdentifierMap &TypeIdentifierMap); + + /// \brief Return the lowered DWARF expression. + StringRef getDWARFBytes() const { return DWARFBytes; } + /// \brief Return the assembler comments for the lowered DWARF expression. + const SmallVectorImpl<std::string> &getComments() const { return Comments; } }; -/// Compare two Values for equality. +/// \brief Compare two Values for equality. inline bool operator==(const DebugLocEntry::Value &A, const DebugLocEntry::Value &B) { if (A.EntryKind != B.EntryKind) @@ -173,7 +186,7 @@ inline bool operator==(const DebugLocEntry::Value &A, llvm_unreachable("unhandled EntryKind"); } -/// Compare two pieces based on their offset. +/// \brief Compare two pieces based on their offset. inline bool operator<(const DebugLocEntry::Value &A, const DebugLocEntry::Value &B) { return A.getExpression().getBitPieceOffset() < diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp index a71f35e..f64338e 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp @@ -54,7 +54,7 @@ void DwarfAccelTable::ComputeBucketCount(void) { // Then compute the bucket size, minimum of 1 bucket. if (num > 1024) Header.bucket_count = num / 4; - if (num > 16) + else if (num > 16) Header.bucket_count = num / 2; else Header.bucket_count = num > 0 ? num : 1; @@ -70,6 +70,7 @@ static bool compareDIEs(const DwarfAccelTable::HashDataContents *A, void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, StringRef Prefix) { // Create the individual hash data outputs. + Data.reserve(Entries.size()); for (StringMap<DataArray>::iterator EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) { @@ -95,8 +96,17 @@ void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, StringRef Prefix) { for (size_t i = 0, e = Data.size(); i < e; ++i) { uint32_t bucket = Data[i]->HashValue % Header.bucket_count; Buckets[bucket].push_back(Data[i]); - Data[i]->Sym = Asm->GetTempSymbol(Prefix, i); + Data[i]->Sym = Asm->createTempSymbol(Prefix); } + + // Sort the contents of the buckets by hash value so that hash + // collisions end up together. Stable sort makes testing easier and + // doesn't cost much more. + for (size_t i = 0; i < Buckets.size(); ++i) + std::stable_sort(Buckets[i].begin(), Buckets[i].end(), + [] (HashData *LHS, HashData *RHS) { + return LHS->HashValue < RHS->HashValue; + }); } // Emits the header for the table via the AsmPrinter. @@ -136,19 +146,32 @@ void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) { Asm->EmitInt32(index); else Asm->EmitInt32(UINT32_MAX); - index += Buckets[i].size(); + // Buckets point in the list of hashes, not to the data. Do not + // increment the index multiple times in case of hash collisions. + uint64_t PrevHash = UINT64_MAX; + for (auto *HD : Buckets[i]) { + uint32_t HashValue = HD->HashValue; + if (PrevHash != HashValue) + ++index; + PrevHash = HashValue; + } } } // Walk through the buckets and emit the individual hashes for each // bucket. void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) { + uint64_t PrevHash = UINT64_MAX; for (size_t i = 0, e = Buckets.size(); i < e; ++i) { for (HashList::const_iterator HI = Buckets[i].begin(), HE = Buckets[i].end(); HI != HE; ++HI) { + uint32_t HashValue = (*HI)->HashValue; + if (PrevHash == HashValue) + continue; Asm->OutStreamer.AddComment("Hash in Bucket " + Twine(i)); - Asm->EmitInt32((*HI)->HashValue); + Asm->EmitInt32(HashValue); + PrevHash = HashValue; } } } @@ -157,11 +180,16 @@ void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) { // element in each bucket. This is done via a symbol subtraction from the // beginning of the section. The non-section symbol will be output later // when we emit the actual data. -void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) { +void DwarfAccelTable::emitOffsets(AsmPrinter *Asm, const MCSymbol *SecBegin) { + uint64_t PrevHash = UINT64_MAX; for (size_t i = 0, e = Buckets.size(); i < e; ++i) { for (HashList::const_iterator HI = Buckets[i].begin(), HE = Buckets[i].end(); HI != HE; ++HI) { + uint32_t HashValue = (*HI)->HashValue; + if (PrevHash == HashValue) + continue; + PrevHash = HashValue; Asm->OutStreamer.AddComment("Offset in Bucket " + Twine(i)); MCContext &Context = Asm->OutStreamer.getContext(); const MCExpr *Sub = MCBinaryExpr::CreateSub( @@ -175,17 +203,20 @@ void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) { // Walk through the buckets and emit the full data for each element in // the bucket. For the string case emit the dies and the various offsets. // Terminate each HashData bucket with 0. -void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D, - MCSymbol *StrSym) { - uint64_t PrevHash = UINT64_MAX; +void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D) { for (size_t i = 0, e = Buckets.size(); i < e; ++i) { + uint64_t PrevHash = UINT64_MAX; for (HashList::const_iterator HI = Buckets[i].begin(), HE = Buckets[i].end(); HI != HE; ++HI) { + // Terminate the previous entry if there is no hash collision + // with the current one. + if (PrevHash != UINT64_MAX && PrevHash != (*HI)->HashValue) + Asm->EmitInt32(0); // Remember to emit the label for our offset. Asm->OutStreamer.EmitLabel((*HI)->Sym); Asm->OutStreamer.AddComment((*HI)->Str); - Asm->EmitSectionOffset((*HI)->Data.StrSym, StrSym); + Asm->emitSectionOffset((*HI)->Data.StrSym); Asm->OutStreamer.AddComment("Num DIEs"); Asm->EmitInt32((*HI)->Data.Values.size()); for (HashDataContents *HD : (*HI)->Data.Values) { @@ -200,17 +231,17 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D, Asm->EmitInt8(HD->Flags); } } - // Emit a 0 to terminate the data unless we have a hash collision. - if (PrevHash != (*HI)->HashValue) - Asm->EmitInt32(0); PrevHash = (*HI)->HashValue; } + // Emit the final end marker for the bucket. + if (!Buckets[i].empty()) + Asm->EmitInt32(0); } } // Emit the entire data structure to the output file. -void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, DwarfDebug *D, - MCSymbol *StrSym) { +void DwarfAccelTable::emit(AsmPrinter *Asm, const MCSymbol *SecBegin, + DwarfDebug *D) { // Emit the header. EmitHeader(Asm); @@ -221,10 +252,10 @@ void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, DwarfDebug *D, EmitHashes(Asm); // Emit the offsets. - EmitOffsets(Asm, SecBegin); + emitOffsets(Asm, SecBegin); // Emit the hash data. - EmitData(Asm, D, StrSym); + EmitData(Asm, D); } #ifndef NDEBUG diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h index 74963da..e6fdf08 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h @@ -222,8 +222,8 @@ private: void EmitHeader(AsmPrinter *); void EmitBuckets(AsmPrinter *); void EmitHashes(AsmPrinter *); - void EmitOffsets(AsmPrinter *, MCSymbol *); - void EmitData(AsmPrinter *, DwarfDebug *D, MCSymbol *StrSym); + void emitOffsets(AsmPrinter *, const MCSymbol *); + void EmitData(AsmPrinter *, DwarfDebug *D); // Allocator for HashData and HashDataContents. BumpPtrAllocator Allocator; @@ -248,7 +248,7 @@ public: void AddName(StringRef Name, MCSymbol *StrSym, const DIE *Die, char Flags = 0); void FinalizeTable(AsmPrinter *, StringRef); - void Emit(AsmPrinter *, MCSymbol *, DwarfDebug *, MCSymbol *StrSym); + void emit(AsmPrinter *, const MCSymbol *, DwarfDebug *); #ifndef NDEBUG void print(raw_ostream &O); void dump() { print(dbgs()); } diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index f45b24c..1bee367 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -39,9 +39,24 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; +DwarfCFIExceptionBase::DwarfCFIExceptionBase(AsmPrinter *A) + : EHStreamer(A), shouldEmitCFI(false) {} + +void DwarfCFIExceptionBase::markFunctionEnd() { + if (shouldEmitCFI) + Asm->OutStreamer.EmitCFIEndProc(); + + if (MMI->getLandingPads().empty()) + return; + + // Map all labels and get rid of any dead landing pads. + MMI->TidyLandingPads(); +} + DwarfCFIException::DwarfCFIException(AsmPrinter *A) - : EHStreamer(A), shouldEmitPersonality(false), shouldEmitLSDA(false), - shouldEmitMoves(false), moveTypeModule(AsmPrinter::CFI_M_None) {} + : DwarfCFIExceptionBase(A), shouldEmitPersonality(false), + shouldEmitLSDA(false), shouldEmitMoves(false), + moveTypeModule(AsmPrinter::CFI_M_None) {} DwarfCFIException::~DwarfCFIException() {} @@ -72,8 +87,6 @@ void DwarfCFIException::endModule() { } } -/// beginFunction - Gather pre-function exception information. Assumes it's -/// being emitted immediately after the function entry point. void DwarfCFIException::beginFunction(const MachineFunction *MF) { shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false; @@ -100,7 +113,8 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) { shouldEmitLSDA = shouldEmitPersonality && LSDAEncoding != dwarf::DW_EH_PE_omit; - if (!shouldEmitPersonality && !shouldEmitMoves) + shouldEmitCFI = shouldEmitPersonality || shouldEmitMoves; + if (!shouldEmitCFI) return; Asm->OutStreamer.EmitCFIStartProc(/*IsSimple=*/false); @@ -113,43 +127,18 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) { TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI); Asm->OutStreamer.EmitCFIPersonality(Sym, PerEncoding); - MCSymbol *EHBegin = - Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber()); - if (Asm->MAI->useAssignmentForEHBegin()) { - MCContext &Ctx = Asm->OutContext; - MCSymbol *CurPos = Ctx.CreateTempSymbol(); - Asm->OutStreamer.EmitLabel(CurPos); - Asm->OutStreamer.EmitAssignment(EHBegin, - MCSymbolRefExpr::Create(CurPos, Ctx)); - } else { - Asm->OutStreamer.EmitLabel(EHBegin); - } - // Provide LSDA information. if (!shouldEmitLSDA) return; - Asm->OutStreamer.EmitCFILsda(Asm->GetTempSymbol("exception", - Asm->getFunctionNumber()), - LSDAEncoding); + Asm->OutStreamer.EmitCFILsda(Asm->getCurExceptionSym(), LSDAEncoding); } /// endFunction - Gather and emit post-function exception information. /// void DwarfCFIException::endFunction(const MachineFunction *) { - if (!shouldEmitPersonality && !shouldEmitMoves) - return; - - Asm->OutStreamer.EmitCFIEndProc(); - if (!shouldEmitPersonality) return; - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end", - Asm->getFunctionNumber())); - - // Map all labels and get rid of any dead landing pads. - MMI->TidyLandingPads(); - emitExceptionTable(); } diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index dcc5fe4..eee5fc5 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -19,7 +19,7 @@ DwarfCompileUnit::DwarfCompileUnit(unsigned UID, DICompileUnit Node, AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU) : DwarfUnit(UID, dwarf::DW_TAG_compile_unit, Node, A, DW, DWU), - Skeleton(nullptr), LabelBegin(nullptr), BaseAddress(nullptr) { + Skeleton(nullptr), BaseAddress(nullptr) { insertDIE(Node, &getUnitDie()); } @@ -164,24 +164,17 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(DIGlobalVariable GV) { addUInt(*Loc, dwarf::DW_FORM_udata, DD->getAddressPool().getIndex(Sym, /* TLS */ true)); } - // 3) followed by a custom OP to make the debugger do a TLS lookup. - addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_push_tls_address); + // 3) followed by an OP to make the debugger do a TLS lookup. + addUInt(*Loc, dwarf::DW_FORM_data1, + DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address + : dwarf::DW_OP_form_tls_address); } else { DD->addArangeLabel(SymbolCU(this, Sym)); addOpAddress(*Loc, Sym); } addBlock(*VariableDIE, dwarf::DW_AT_location, Loc); - // Add the linkage name. - StringRef LinkageName = GV.getLinkageName(); - if (!LinkageName.empty()) - // From DWARF4: DIEs to which DW_AT_linkage_name may apply include: - // TAG_common_block, TAG_constant, TAG_entry_point, TAG_subprogram and - // TAG_variable. - addString(*VariableDIE, - DD->getDwarfVersion() >= 4 ? dwarf::DW_AT_linkage_name - : dwarf::DW_AT_MIPS_linkage_name, - GlobalValue::getRealLinkageName(LinkageName)); + addLinkageName(*VariableDIE, GV.getLinkageName()); } else if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(GV.getConstant())) { addConstantValue(*VariableDIE, CI, GTy); @@ -243,7 +236,7 @@ void DwarfCompileUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute, addSectionDelta(Die, Attribute, Label, Sec); } -void DwarfCompileUnit::initStmtList(MCSymbol *DwarfLineSectionSym) { +void DwarfCompileUnit::initStmtList() { // Define start line table label for each Compile Unit. MCSymbol *LineTableStartSym = Asm->OutStreamer.getDwarfLineTableSymbol(getUniqueID()); @@ -255,8 +248,9 @@ void DwarfCompileUnit::initStmtList(MCSymbol *DwarfLineSectionSym) { // left in the skeleton CU and so not included. // The line table entries are not always emitted in assembly, so it // is not okay to use line_table_start here. + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); addSectionLabel(UnitDie, dwarf::DW_AT_stmt_list, LineTableStartSym, - DwarfLineSectionSym); + TLOF.getDwarfLineSection()->getBeginSymbol()); } void DwarfCompileUnit::applyStmtList(DIE &D) { @@ -285,7 +279,7 @@ void DwarfCompileUnit::attachLowHighPC(DIE &D, const MCSymbol *Begin, DIE &DwarfCompileUnit::updateSubprogramScopeDIE(DISubprogram SP) { DIE *SPDie = getOrCreateSubprogramDIE(SP, includeMinimalInlineScopes()); - attachLowHighPC(*SPDie, DD->getFunctionBeginSym(), DD->getFunctionEndSym()); + attachLowHighPC(*SPDie, Asm->getFunctionBegin(), Asm->getFunctionEnd()); if (!DD->getCurrentFunction()->getTarget().Options.DisableFramePointerElim( *DD->getCurrentFunction())) addFlag(*SPDie, dwarf::DW_AT_APPLE_omit_frame_ptr); @@ -378,13 +372,14 @@ void DwarfCompileUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute, void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE, SmallVector<RangeSpan, 2> Range) { + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + // Emit offset in .debug_range as a relocatable label. emitDIE will handle // emitting it appropriately. - auto *RangeSectionSym = DD->getRangeSectionSym(); + const MCSymbol *RangeSectionSym = + TLOF.getDwarfRangesSection()->getBeginSymbol(); - RangeSpanList List( - Asm->GetTempSymbol("debug_ranges", DD->getNextRangeNumber()), - std::move(Range)); + RangeSpanList List(Asm->createTempSymbol("debug_ranges"), std::move(Range)); // Under fission, ranges are specified by constant offsets relative to the // CU's DW_AT_GNU_ranges_base. @@ -709,12 +704,14 @@ void DwarfCompileUnit::collectDeadVariables(DISubprogram SP) { } } -void DwarfCompileUnit::emitHeader(const MCSymbol *ASectionSym) const { +void DwarfCompileUnit::emitHeader(bool UseOffsets) { // Don't bother labeling the .dwo unit, as its offset isn't used. - if (!Skeleton) + if (!Skeleton) { + LabelBegin = Asm->createTempSymbol("cu_begin"); Asm->OutStreamer.EmitLabel(LabelBegin); + } - DwarfUnit::emitHeader(ASectionSym); + DwarfUnit::emitHeader(UseOffsets); } /// addGlobalName - Add a new global name to the compile unit. diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index c66af65..9484bb6 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -36,9 +36,6 @@ class DwarfCompileUnit : public DwarfUnit { /// Skeleton unit associated with this unit. DwarfCompileUnit *Skeleton; - /// A label at the start of the non-dwo section related to this unit. - MCSymbol *SectionSym; - /// The start of the unit within its section. MCSymbol *LabelBegin; @@ -76,7 +73,7 @@ public: return Skeleton; } - void initStmtList(MCSymbol *DwarfLineSectionSym); + void initStmtList(); /// Apply the DW_AT_stmt_list from this compile unit to the specified DIE. void applyStmtList(DIE &D); @@ -168,22 +165,9 @@ public: /// Set the skeleton unit associated with this unit. void setSkeleton(DwarfCompileUnit &Skel) { Skeleton = &Skel; } - MCSymbol *getSectionSym() const { + const MCSymbol *getSectionSym() const { assert(Section); - return SectionSym; - } - - /// Pass in the SectionSym even though we could recreate it in every compile - /// unit (type units will have actually distinct symbols once they're in - /// comdat sections). - void initSection(const MCSection *Section, MCSymbol *SectionSym) { - DwarfUnit::initSection(Section); - this->SectionSym = SectionSym; - - // Don't bother labeling the .dwo unit, as its offset isn't used. - if (!Skeleton) - LabelBegin = - Asm->GetTempSymbol(Section->getLabelBeginName(), getUniqueID()); + return Section->getBeginSymbol(); } unsigned getLength() { @@ -191,7 +175,7 @@ public: getHeaderSize() + UnitDie.getSize(); } - void emitHeader(const MCSymbol *ASectionSym) const override; + void emitHeader(bool UseOffsets) override; MCSymbol *getLabelBegin() const { assert(Section); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index aa1f79f..e9ebd97 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -45,6 +45,7 @@ #include "llvm/Support/MD5.h" #include "llvm/Support/Path.h" #include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" @@ -105,6 +106,25 @@ DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden, static const char *const DWARFGroupName = "DWARF Emission"; static const char *const DbgTimerName = "DWARF Debug Writer"; +void DebugLocDwarfExpression::EmitOp(uint8_t Op, const char *Comment) { + BS.EmitInt8( + Op, Comment ? Twine(Comment) + " " + dwarf::OperationEncodingString(Op) + : dwarf::OperationEncodingString(Op)); +} + +void DebugLocDwarfExpression::EmitSigned(int64_t Value) { + BS.EmitSLEB128(Value, Twine(Value)); +} + +void DebugLocDwarfExpression::EmitUnsigned(uint64_t Value) { + BS.EmitULEB128(Value, Twine(Value)); +} + +bool DebugLocDwarfExpression::isFrameRegister(unsigned MachineReg) { + // This information is not available while emitting .debug_loc entries. + return false; +} + //===----------------------------------------------------------------------===// /// resolve - Look in the DwarfDebug map for the MDNode that @@ -169,11 +189,12 @@ static LLVM_CONSTEXPR DwarfAccelTable::Atom TypeAtoms[] = { DwarfAccelTable::Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)}; DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) - : Asm(A), MMI(Asm->MMI), PrevLabel(nullptr), GlobalRangeCount(0), - InfoHolder(A, *this, "info_string", DIEValueAllocator), + : Asm(A), MMI(Asm->MMI), PrevLabel(nullptr), + InfoHolder(A, "info_string", DIEValueAllocator), UsedNonDefaultText(false), - SkeletonHolder(A, *this, "skel_string", DIEValueAllocator), + SkeletonHolder(A, "skel_string", DIEValueAllocator), IsDarwin(Triple(A->getTargetTriple()).isOSDarwin()), + IsPS4(Triple(A->getTargetTriple()).isPS4()), AccelNames(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)), AccelObjC(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, @@ -182,17 +203,11 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) dwarf::DW_FORM_data4)), AccelTypes(TypeAtoms) { - DwarfInfoSectionSym = DwarfAbbrevSectionSym = DwarfStrSectionSym = nullptr; - DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = nullptr; - DwarfLineSectionSym = nullptr; - DwarfAddrSectionSym = nullptr; - DwarfAbbrevDWOSectionSym = DwarfStrDWOSectionSym = nullptr; - FunctionBeginSym = FunctionEndSym = nullptr; CurFn = nullptr; CurMI = nullptr; // Turn on accelerator tables for Darwin by default, pubnames by - // default for non-Darwin, and handle split dwarf. + // default for non-Darwin/PS4, and handle split dwarf. if (DwarfAccelTables == Default) HasDwarfAccelTables = IsDarwin; else @@ -204,7 +219,7 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) HasSplitDwarf = SplitDwarf == Enable; if (DwarfPubSections == Default) - HasDwarfPubSections = !IsDarwin; + HasDwarfPubSections = !IsDarwin && !IsPS4; else HasDwarfPubSections = DwarfPubSections == Enable; @@ -212,6 +227,10 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber : MMI->getModule()->getDwarfVersion(); + // Darwin and PS4 use the standard TLS opcode (defined in DWARF 3). + // Everybody else uses GNU's. + UseGNUTLSOpcode = !(IsDarwin || IsPS4) || DwarfVersion < 3; + Asm->OutStreamer.getContext().setDwarfVersion(DwarfVersion); { @@ -223,19 +242,6 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) // Define out of line so we don't have to include DwarfUnit.h in DwarfDebug.h. DwarfDebug::~DwarfDebug() { } -// Switch to the specified MCSection and emit an assembler -// temporary label to it if SymbolStem is specified. -static MCSymbol *emitSectionSym(AsmPrinter *Asm, const MCSection *Section, - const char *SymbolStem = nullptr) { - Asm->OutStreamer.SwitchSection(Section); - if (!SymbolStem) - return nullptr; - - MCSymbol *TmpSym = Asm->GetTempSymbol(SymbolStem); - Asm->OutStreamer.EmitLabel(TmpSym); - return TmpSym; -} - static bool isObjCClass(StringRef Name) { return Name.startswith("+") || Name.startswith("-"); } @@ -264,13 +270,6 @@ static StringRef getObjCMethodName(StringRef In) { return In.slice(In.find(' ') + 1, In.find(']')); } -// Helper for sorting sections into a stable output order. -static bool SectionSort(const MCSection *A, const MCSection *B) { - std::string LA = (A ? A->getLabelBeginName() : ""); - std::string LB = (B ? B->getLabelBeginName() : ""); - return LA < LB; -} - // Add the various names to the Dwarf accelerator table names. // TODO: Determine whether or not we should add names for programs // that do not have a DW_AT_name or DW_AT_linkage_name field - this @@ -388,7 +387,7 @@ DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) { NewCU.addString(Die, dwarf::DW_AT_name, FN); if (!useSplitDwarf()) { - NewCU.initStmtList(DwarfLineSectionSym); + NewCU.initStmtList(); // If we're using split dwarf the compilation dir is going to be in the // skeleton CU and so we don't need to duplicate it here. @@ -410,11 +409,9 @@ DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) { dwarf::DW_FORM_data1, RVer); if (useSplitDwarf()) - NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoDWOSection(), - DwarfInfoDWOSectionSym); + NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoDWOSection()); else - NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection(), - DwarfInfoSectionSym); + NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection()); CUMap.insert(std::make_pair(DIUnit, &NewCU)); CUDieMap.insert(std::make_pair(&Die, &NewCU)); @@ -445,9 +442,6 @@ void DwarfDebug::beginModule() { return; TypeIdentifierMap = generateDITypeIdentifierMap(CU_Nodes); - // Emit initial sections so we can reference labels later. - emitSectionLabels(); - SingleCU = CU_Nodes->getNumOperands() == 1; for (MDNode *N : CU_Nodes->operands()) { @@ -458,8 +452,11 @@ void DwarfDebug::beginModule() { ScopesWithImportedEntities.push_back(std::make_pair( DIImportedEntity(ImportedEntities.getElement(i)).getContext(), ImportedEntities.getElement(i))); - std::sort(ScopesWithImportedEntities.begin(), - ScopesWithImportedEntities.end(), less_first()); + // Stable sort to preserve the order of appearance of imported entities. + // This is to avoid out-of-order processing of interdependent declarations + // within the same scope, e.g. { namespace A = base; namespace B = A; } + std::stable_sort(ScopesWithImportedEntities.begin(), + ScopesWithImportedEntities.end(), less_first()); DIArray GVs = CUNode.getGlobalVariables(); for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) CU.getOrCreateGlobalVariableDIE(DIGlobalVariable(GVs.getElement(i))); @@ -541,6 +538,8 @@ void DwarfDebug::collectDeadVariables() { } void DwarfDebug::finalizeModuleInfo() { + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + finishSubprogramDefinitions(); finishVariableDefinitions(); @@ -570,13 +569,16 @@ void DwarfDebug::finalizeModuleInfo() { // We don't keep track of which addresses are used in which CU so this // is a bit pessimistic under LTO. - if (!AddrPool.isEmpty()) + if (!AddrPool.isEmpty()) { + const MCSymbol *Sym = TLOF.getDwarfAddrSection()->getBeginSymbol(); SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_addr_base, - DwarfAddrSectionSym, DwarfAddrSectionSym); - if (!SkCU->getRangeLists().empty()) + Sym, Sym); + } + if (!SkCU->getRangeLists().empty()) { + const MCSymbol *Sym = TLOF.getDwarfRangesSection()->getBeginSymbol(); SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_ranges_base, - DwarfDebugRangeSectionSym, - DwarfDebugRangeSectionSym); + Sym, Sym); + } } // If we have code split among multiple sections or non-contiguous @@ -613,7 +615,7 @@ void DwarfDebug::endModule() { // If we aren't actually generating debug info (check beginModule - // conditionalized on !DisableDebugInfoPrinting and the presence of the // llvm.dbg.cu metadata node) - if (!DwarfInfoSectionSym) + if (!MMI->hasDebugInfo()) return; // Finalize the debug info for the module. @@ -621,12 +623,18 @@ void DwarfDebug::endModule() { emitDebugStr(); - // Emit all the DIEs into a debug info section. - emitDebugInfo(); + if (useSplitDwarf()) + emitDebugLocDWO(); + else + // Emit info into a debug loc section. + emitDebugLoc(); // Corresponding abbreviations into a abbrev section. emitAbbreviations(); + // Emit all the DIEs into a debug info section. + emitDebugInfo(); + // Emit info into a debug aranges section. if (GenerateARangeSection) emitDebugARanges(); @@ -639,12 +647,9 @@ void DwarfDebug::endModule() { emitDebugInfoDWO(); emitDebugAbbrevDWO(); emitDebugLineDWO(); - emitDebugLocDWO(); // Emit DWO addresses. AddrPool.emit(*Asm, Asm->getObjFileLowering().getDwarfAddrSection()); - } else - // Emit info into a debug loc section. - emitDebugLoc(); + } // Emit info into the dwarf accelerator table sections. if (useDwarfAccelTables()) { @@ -828,7 +833,7 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc, if (End != nullptr) EndLabel = getLabelAfterInsn(End); else if (std::next(I) == Ranges.end()) - EndLabel = FunctionEndSym; + EndLabel = Asm->getFunctionEnd(); else EndLabel = getLabelBeforeInsn(std::next(I)->first); assert(EndLabel && "Forgot label after instruction ending a range!"); @@ -922,11 +927,13 @@ DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, DISubprogram SP, DotDebugLocEntries.resize(DotDebugLocEntries.size() + 1); DebugLocList &LocList = DotDebugLocEntries.back(); LocList.CU = &TheCU; - LocList.Label = - Asm->GetTempSymbol("debug_loc", DotDebugLocEntries.size() - 1); + LocList.Label = Asm->createTempSymbol("debug_loc"); // Build the location list for this variable. buildLocationList(LocList.List, Ranges); + // Finalize the entry by lowering it into a DWARF bytestream. + for (auto &Entry : LocList.List) + Entry.finalize(*Asm, TypeIdentifierMap); } // Collect info for variables that were optimized out. @@ -964,23 +971,25 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { // Check if source location changes, but ignore DBG_VALUE locations. if (!MI->isDebugValue()) { DebugLoc DL = MI->getDebugLoc(); - if (DL != PrevInstLoc && (!DL.isUnknown() || UnknownLocations)) { - unsigned Flags = 0; - PrevInstLoc = DL; - if (DL == PrologEndLoc) { - Flags |= DWARF2_FLAG_PROLOGUE_END; - PrologEndLoc = DebugLoc(); - Flags |= DWARF2_FLAG_IS_STMT; - } - if (DL.getLine() != - Asm->OutStreamer.getContext().getCurrentDwarfLoc().getLine()) - Flags |= DWARF2_FLAG_IS_STMT; - + if (DL != PrevInstLoc) { if (!DL.isUnknown()) { + unsigned Flags = 0; + PrevInstLoc = DL; + if (DL == PrologEndLoc) { + Flags |= DWARF2_FLAG_PROLOGUE_END; + PrologEndLoc = DebugLoc(); + Flags |= DWARF2_FLAG_IS_STMT; + } + if (DL.getLine() != + Asm->OutStreamer.getContext().getCurrentDwarfLoc().getLine()) + Flags |= DWARF2_FLAG_IS_STMT; + const MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext()); recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags); - } else + } else if (UnknownLocations) { + PrevInstLoc = DL; recordSourceLine(0, 0, nullptr, 0); + } } } @@ -1116,11 +1125,6 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { else Asm->OutStreamer.getContext().setDwarfCompileUnitID(TheCU->getUniqueID()); - // Emit a label for the function so that we have a beginning address. - FunctionBeginSym = Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber()); - // Assumes in correct section after the entry point. - Asm->OutStreamer.EmitLabel(FunctionBeginSym); - // Calculate history for local variables. calculateDbgValueHistory(MF, Asm->MF->getSubtarget().getRegisterInfo(), DbgValues); @@ -1131,12 +1135,12 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { if (Ranges.empty()) continue; - // The first mention of a function argument gets the FunctionBeginSym + // The first mention of a function argument gets the CurrentFnBegin // label, so arguments are visible when breaking at function entry. DIVariable DIVar(Ranges.front().first->getDebugVariable()); if (DIVar.isVariable() && DIVar.getTag() == dwarf::DW_TAG_arg_variable && getDISubprogram(DIVar.getContext()).describes(MF->getFunction())) { - LabelsBeforeInsn[Ranges.front().first] = FunctionBeginSym; + LabelsBeforeInsn[Ranges.front().first] = Asm->getFunctionBegin(); if (Ranges.front().first->getDebugExpression().isBitPiece()) { // Mark all non-overlapping initial pieces. for (auto I = Ranges.begin(); I != Ranges.end(); ++I) { @@ -1145,7 +1149,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { [&](DbgValueHistoryMap::InstrRange Pred) { return !piecesOverlap(Piece, Pred.first->getDebugExpression()); })) - LabelsBeforeInsn[I->first] = FunctionBeginSym; + LabelsBeforeInsn[I->first] = Asm->getFunctionBegin(); else break; } @@ -1160,7 +1164,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { } PrevInstLoc = DebugLoc(); - PrevLabel = FunctionBeginSym; + PrevLabel = Asm->getFunctionBegin(); // Record beginning of function. PrologEndLoc = findPrologueEndLoc(MF); @@ -1191,11 +1195,6 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { return; } - // Define end label for subprogram. - FunctionEndSym = Asm->GetTempSymbol("func_end", Asm->getFunctionNumber()); - // Assumes in correct section after the entry point. - Asm->OutStreamer.EmitLabel(FunctionEndSym); - // Set DwarfDwarfCompileUnitID in MCContext to default value. Asm->OutStreamer.getContext().setDwarfCompileUnitID(0); @@ -1207,7 +1206,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { collectVariableInfo(TheCU, SP, ProcessedVars); // Add the range of this function to the list of ranges for the CU. - TheCU.addRange(RangeSpan(FunctionBeginSym, FunctionEndSym)); + TheCU.addRange(RangeSpan(Asm->getFunctionBegin(), Asm->getFunctionEnd())); // Under -gmlt, skip building the subprogram if there are no inlined // subroutines inside it. @@ -1290,103 +1289,10 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S, // Emit Methods //===----------------------------------------------------------------------===// -// Emit initial Dwarf sections with a label at the start of each one. -void DwarfDebug::emitSectionLabels() { - const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); - - // Dwarf sections base addresses. - DwarfInfoSectionSym = - emitSectionSym(Asm, TLOF.getDwarfInfoSection(), "section_info"); - if (useSplitDwarf()) { - DwarfInfoDWOSectionSym = - emitSectionSym(Asm, TLOF.getDwarfInfoDWOSection(), "section_info_dwo"); - DwarfTypesDWOSectionSym = emitSectionSym( - Asm, TLOF.getDwarfTypesDWOSection(), "section_types_dwo"); - } - DwarfAbbrevSectionSym = - emitSectionSym(Asm, TLOF.getDwarfAbbrevSection(), "section_abbrev"); - if (useSplitDwarf()) - DwarfAbbrevDWOSectionSym = emitSectionSym( - Asm, TLOF.getDwarfAbbrevDWOSection(), "section_abbrev_dwo"); - if (GenerateARangeSection) - emitSectionSym(Asm, TLOF.getDwarfARangesSection()); - - DwarfLineSectionSym = - emitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line"); - if (GenerateGnuPubSections) { - DwarfGnuPubNamesSectionSym = - emitSectionSym(Asm, TLOF.getDwarfGnuPubNamesSection()); - DwarfGnuPubTypesSectionSym = - emitSectionSym(Asm, TLOF.getDwarfGnuPubTypesSection()); - } else if (HasDwarfPubSections) { - emitSectionSym(Asm, TLOF.getDwarfPubNamesSection()); - emitSectionSym(Asm, TLOF.getDwarfPubTypesSection()); - } - - DwarfStrSectionSym = - emitSectionSym(Asm, TLOF.getDwarfStrSection(), "info_string"); - if (useSplitDwarf()) { - DwarfStrDWOSectionSym = - emitSectionSym(Asm, TLOF.getDwarfStrDWOSection(), "skel_string"); - DwarfAddrSectionSym = - emitSectionSym(Asm, TLOF.getDwarfAddrSection(), "addr_sec"); - DwarfDebugLocSectionSym = - emitSectionSym(Asm, TLOF.getDwarfLocDWOSection(), "skel_loc"); - } else - DwarfDebugLocSectionSym = - emitSectionSym(Asm, TLOF.getDwarfLocSection(), "section_debug_loc"); - DwarfDebugRangeSectionSym = - emitSectionSym(Asm, TLOF.getDwarfRangesSection(), "debug_range"); -} - -// Recursively emits a debug information entry. -void DwarfDebug::emitDIE(DIE &Die) { - // Get the abbreviation for this DIE. - const DIEAbbrev &Abbrev = Die.getAbbrev(); - - // Emit the code (index) for the abbreviation. - if (Asm->isVerbose()) - Asm->OutStreamer.AddComment("Abbrev [" + Twine(Abbrev.getNumber()) + - "] 0x" + Twine::utohexstr(Die.getOffset()) + - ":0x" + Twine::utohexstr(Die.getSize()) + " " + - dwarf::TagString(Abbrev.getTag())); - Asm->EmitULEB128(Abbrev.getNumber()); - - const SmallVectorImpl<DIEValue *> &Values = Die.getValues(); - const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData(); - - // Emit the DIE attribute values. - for (unsigned i = 0, N = Values.size(); i < N; ++i) { - dwarf::Attribute Attr = AbbrevData[i].getAttribute(); - dwarf::Form Form = AbbrevData[i].getForm(); - assert(Form && "Too many attributes for DIE (check abbreviation)"); - - if (Asm->isVerbose()) { - Asm->OutStreamer.AddComment(dwarf::AttributeString(Attr)); - if (Attr == dwarf::DW_AT_accessibility) - Asm->OutStreamer.AddComment(dwarf::AccessibilityString( - cast<DIEInteger>(Values[i])->getValue())); - } - - // Emit an attribute using the defined form. - Values[i]->EmitValue(Asm, Form); - } - - // Emit the DIE children if any. - if (Abbrev.hasChildren()) { - for (auto &Child : Die.getChildren()) - emitDIE(*Child); - - Asm->OutStreamer.AddComment("End Of Children Mark"); - Asm->EmitInt8(0); - } -} - // Emit the debug info section. void DwarfDebug::emitDebugInfo() { DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; - - Holder.emitUnits(DwarfAbbrevSectionSym); + Holder.emitUnits(/* UseOffsets */ false); } // Emit the abbreviation section. @@ -1396,65 +1302,39 @@ void DwarfDebug::emitAbbreviations() { Holder.emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevSection()); } -// Emit the last address of the section and the end of the line matrix. -void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) { - // Define last address of section. - Asm->OutStreamer.AddComment("Extended Op"); - Asm->EmitInt8(0); - - Asm->OutStreamer.AddComment("Op size"); - Asm->EmitInt8(Asm->getDataLayout().getPointerSize() + 1); - Asm->OutStreamer.AddComment("DW_LNE_set_address"); - Asm->EmitInt8(dwarf::DW_LNE_set_address); - - Asm->OutStreamer.AddComment("Section end label"); - - Asm->OutStreamer.EmitSymbolValue( - Asm->GetTempSymbol("section_end", SectionEnd), - Asm->getDataLayout().getPointerSize()); - - // Mark end of matrix. - Asm->OutStreamer.AddComment("DW_LNE_end_sequence"); - Asm->EmitInt8(0); - Asm->EmitInt8(1); - Asm->EmitInt8(1); -} - void DwarfDebug::emitAccel(DwarfAccelTable &Accel, const MCSection *Section, - StringRef TableName, StringRef SymName) { + StringRef TableName) { Accel.FinalizeTable(Asm, TableName); Asm->OutStreamer.SwitchSection(Section); - auto *SectionBegin = Asm->GetTempSymbol(SymName); - Asm->OutStreamer.EmitLabel(SectionBegin); // Emit the full data. - Accel.Emit(Asm, SectionBegin, this, DwarfStrSectionSym); + Accel.emit(Asm, Section->getBeginSymbol(), this); } // Emit visible names into a hashed accelerator table section. void DwarfDebug::emitAccelNames() { emitAccel(AccelNames, Asm->getObjFileLowering().getDwarfAccelNamesSection(), - "Names", "names_begin"); + "Names"); } // Emit objective C classes and categories into a hashed accelerator table // section. void DwarfDebug::emitAccelObjC() { emitAccel(AccelObjC, Asm->getObjFileLowering().getDwarfAccelObjCSection(), - "ObjC", "objc_begin"); + "ObjC"); } // Emit namespace dies into a hashed accelerator table. void DwarfDebug::emitAccelNamespaces() { emitAccel(AccelNamespace, Asm->getObjFileLowering().getDwarfAccelNamespaceSection(), - "namespac", "namespac_begin"); + "namespac"); } // Emit type dies into a hashed accelerator table. void DwarfDebug::emitAccelTypes() { emitAccel(AccelTypes, Asm->getObjFileLowering().getDwarfAccelTypesSection(), - "types", "types_begin"); + "types"); } // Public name handling. @@ -1537,15 +1417,14 @@ void DwarfDebug::emitDebugPubSection( if (auto *Skeleton = TheU->getSkeleton()) TheU = Skeleton; - unsigned ID = TheU->getUniqueID(); // Start the dwarf pubnames section. Asm->OutStreamer.SwitchSection(PSec); // Emit the header. Asm->OutStreamer.AddComment("Length of Public " + Name + " Info"); - MCSymbol *BeginLabel = Asm->GetTempSymbol("pub" + Name + "_begin", ID); - MCSymbol *EndLabel = Asm->GetTempSymbol("pub" + Name + "_end", ID); + MCSymbol *BeginLabel = Asm->createTempSymbol("pub" + Name + "_begin"); + MCSymbol *EndLabel = Asm->createTempSymbol("pub" + Name + "_end"); Asm->EmitLabelDifference(EndLabel, BeginLabel, 4); Asm->OutStreamer.EmitLabel(BeginLabel); @@ -1554,7 +1433,7 @@ void DwarfDebug::emitDebugPubSection( Asm->EmitInt16(dwarf::DW_PUBNAMES_VERSION); Asm->OutStreamer.AddComment("Offset of Compilation Unit Info"); - Asm->EmitSectionOffset(TheU->getLabelBegin(), TheU->getSectionSym()); + Asm->emitSectionOffset(TheU->getLabelBegin()); Asm->OutStreamer.AddComment("Compilation Unit Length"); Asm->EmitInt32(TheU->getLength()); @@ -1600,62 +1479,27 @@ void DwarfDebug::emitDebugStr() { Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection()); } -/// Emits an optimal (=sorted) sequence of DW_OP_pieces. -void DwarfDebug::emitLocPieces(ByteStreamer &Streamer, - const DITypeIdentifierMap &Map, - ArrayRef<DebugLocEntry::Value> Values) { - assert(std::all_of(Values.begin(), Values.end(), [](DebugLocEntry::Value P) { - return P.isBitPiece(); - }) && "all values are expected to be pieces"); - assert(std::is_sorted(Values.begin(), Values.end()) && - "pieces are expected to be sorted"); - - unsigned Offset = 0; - for (auto Piece : Values) { - DIExpression Expr = Piece.getExpression(); - unsigned PieceOffset = Expr.getBitPieceOffset(); - unsigned PieceSize = Expr.getBitPieceSize(); - assert(Offset <= PieceOffset && "overlapping or duplicate pieces"); - if (Offset < PieceOffset) { - // The DWARF spec seriously mandates pieces with no locations for gaps. - Asm->EmitDwarfOpPiece(Streamer, PieceOffset-Offset); - Offset += PieceOffset-Offset; - } - Offset += PieceSize; - -#ifndef NDEBUG - DIVariable Var = Piece.getVariable(); - unsigned VarSize = Var.getSizeInBits(Map); - assert(PieceSize+PieceOffset <= VarSize - && "piece is larger than or outside of variable"); - assert(PieceSize != VarSize - && "piece covers entire variable"); -#endif - emitDebugLocValue(Streamer, Piece, PieceOffset); - } -} - void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer, const DebugLocEntry &Entry) { - const DebugLocEntry::Value Value = Entry.getValues()[0]; - if (Value.isBitPiece()) - // Emit all pieces that belong to the same variable and range. - return emitLocPieces(Streamer, TypeIdentifierMap, Entry.getValues()); - - assert(Entry.getValues().size() == 1 && "only pieces may have >1 value"); - emitDebugLocValue(Streamer, Value); + auto Comment = Entry.getComments().begin(); + auto End = Entry.getComments().end(); + for (uint8_t Byte : Entry.getDWARFBytes()) + Streamer.EmitInt8(Byte, Comment != End ? *(Comment++) : ""); } -void DwarfDebug::emitDebugLocValue(ByteStreamer &Streamer, - const DebugLocEntry::Value &Value, - unsigned PieceOffsetInBits) { +static void emitDebugLocValue(const AsmPrinter &AP, + const DITypeIdentifierMap &TypeIdentifierMap, + ByteStreamer &Streamer, + const DebugLocEntry::Value &Value, + unsigned PieceOffsetInBits) { DIVariable DV = Value.getVariable(); - DebugLocDwarfExpression DwarfExpr(*Asm, Streamer); - + DebugLocDwarfExpression DwarfExpr(*AP.MF->getSubtarget().getRegisterInfo(), + AP.getDwarfDebug()->getDwarfVersion(), + Streamer); // Regular entry. if (Value.isInt()) { - DIBasicType BTy(resolve(DV.getType())); + DIBasicType BTy(DV.getType().resolve(TypeIdentifierMap)); if (BTy.Verify() && (BTy.getEncoding() == dwarf::DW_ATE_signed || BTy.getEncoding() == dwarf::DW_ATE_signed_char)) DwarfExpr.AddSignedConstant(Value.getInt()); @@ -1666,7 +1510,7 @@ void DwarfDebug::emitDebugLocValue(ByteStreamer &Streamer, DIExpression Expr = Value.getExpression(); if (!Expr || (Expr.getNumElements() == 0)) // Regular entry. - Asm->EmitDwarfRegOp(Streamer, Loc); + AP.EmitDwarfRegOp(Streamer, Loc); else { // Complex address entry. if (Loc.getOffset()) { @@ -1682,6 +1526,52 @@ void DwarfDebug::emitDebugLocValue(ByteStreamer &Streamer, // FIXME: ^ } + +void DebugLocEntry::finalize(const AsmPrinter &AP, + const DITypeIdentifierMap &TypeIdentifierMap) { + BufferByteStreamer Streamer(DWARFBytes, Comments); + const DebugLocEntry::Value Value = Values[0]; + if (Value.isBitPiece()) { + // Emit all pieces that belong to the same variable and range. + assert(std::all_of(Values.begin(), Values.end(), [](DebugLocEntry::Value P) { + return P.isBitPiece(); + }) && "all values are expected to be pieces"); + assert(std::is_sorted(Values.begin(), Values.end()) && + "pieces are expected to be sorted"); + + unsigned Offset = 0; + for (auto Piece : Values) { + DIExpression Expr = Piece.getExpression(); + unsigned PieceOffset = Expr.getBitPieceOffset(); + unsigned PieceSize = Expr.getBitPieceSize(); + assert(Offset <= PieceOffset && "overlapping or duplicate pieces"); + if (Offset < PieceOffset) { + // The DWARF spec seriously mandates pieces with no locations for gaps. + DebugLocDwarfExpression Expr(*AP.MF->getSubtarget().getRegisterInfo(), + AP.getDwarfDebug()->getDwarfVersion(), + Streamer); + Expr.AddOpPiece(PieceOffset-Offset, 0); + Offset += PieceOffset-Offset; + } + Offset += PieceSize; + +#ifndef NDEBUG + DIVariable Var = Piece.getVariable(); + unsigned VarSize = Var.getSizeInBits(TypeIdentifierMap); + assert(PieceSize+PieceOffset <= VarSize + && "piece is larger than or outside of variable"); + assert(PieceSize != VarSize + && "piece covers entire variable"); +#endif + emitDebugLocValue(AP, TypeIdentifierMap, Streamer, Piece, PieceOffset); + } + } else { + assert(Values.size() == 1 && "only pieces may have >1 value"); + emitDebugLocValue(AP, TypeIdentifierMap, Streamer, Value, 0); + } +} + + void DwarfDebug::emitDebugLocEntryLocation(const DebugLocEntry &Entry) { Asm->OutStreamer.AddComment("Loc expr size"); MCSymbol *begin = Asm->OutStreamer.getContext().CreateTempSymbol(); @@ -1752,10 +1642,7 @@ struct ArangeSpan { // address we can tie back to a CU. void DwarfDebug::emitDebugARanges() { // Provides a unique id per text section. - DenseMap<const MCSection *, SmallVector<SymbolCU, 8>> SectionMap; - - // Prime section data. - SectionMap[Asm->getObjFileLowering().getTextSection()]; + MapVector<const MCSection *, SmallVector<SymbolCU, 8>> SectionMap; // Filter labels by section. for (const SymbolCU &SCU : ArangeLabels) { @@ -1772,31 +1659,13 @@ void DwarfDebug::emitDebugARanges() { } } - // Build a list of sections used. - std::vector<const MCSection *> Sections; - for (const auto &it : SectionMap) { - const MCSection *Section = it.first; - Sections.push_back(Section); - } - - // Sort the sections into order. - // This is only done to ensure consistent output order across different runs. - std::sort(Sections.begin(), Sections.end(), SectionSort); - // Add terminating symbols for each section. - for (unsigned ID = 0, E = Sections.size(); ID != E; ID++) { - const MCSection *Section = Sections[ID]; + for (const auto &I : SectionMap) { + const MCSection *Section = I.first; MCSymbol *Sym = nullptr; - if (Section) { - // We can't call MCSection::getLabelEndName, as it's only safe to do so - // if we know the section name up-front. For user-created sections, the - // resulting label may not be valid to use as a label. (section names can - // use a greater set of characters on some systems) - Sym = Asm->GetTempSymbol("debug_end", ID); - Asm->OutStreamer.SwitchSection(Section); - Asm->OutStreamer.EmitLabel(Sym); - } + if (Section) + Sym = Asm->OutStreamer.endSection(Section); // Insert a final terminator. SectionMap[Section].push_back(SymbolCU(nullptr, Sym)); @@ -1804,8 +1673,9 @@ void DwarfDebug::emitDebugARanges() { DenseMap<DwarfCompileUnit *, std::vector<ArangeSpan>> Spans; - for (const MCSection *Section : Sections) { - SmallVector<SymbolCU, 8> &List = SectionMap[Section]; + for (auto &I : SectionMap) { + const MCSection *Section = I.first; + SmallVector<SymbolCU, 8> &List = I.second; if (List.size() < 2) continue; @@ -1902,7 +1772,7 @@ void DwarfDebug::emitDebugARanges() { Asm->OutStreamer.AddComment("DWARF Arange version number"); Asm->EmitInt16(dwarf::DW_ARANGES_VERSION); Asm->OutStreamer.AddComment("Offset Into Debug Info Section"); - Asm->EmitSectionOffset(CU->getLabelBegin(), CU->getSectionSym()); + Asm->emitSectionOffset(CU->getLabelBegin()); Asm->OutStreamer.AddComment("Address Size (in bytes)"); Asm->EmitInt8(PtrSize); Asm->OutStreamer.AddComment("Segment Size (in bytes)"); @@ -1998,10 +1868,9 @@ DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) { auto OwnedUnit = make_unique<DwarfCompileUnit>( CU.getUniqueID(), CU.getCUNode(), Asm, this, &SkeletonHolder); DwarfCompileUnit &NewCU = *OwnedUnit; - NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection(), - DwarfInfoSectionSym); + NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection()); - NewCU.initStmtList(DwarfLineSectionSym); + NewCU.initStmtList(); initSkeletonUnit(CU, NewCU.getUnitDie(), std::move(OwnedUnit)); @@ -2012,9 +1881,8 @@ DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) { // compile units that would normally be in debug_info. void DwarfDebug::emitDebugInfoDWO() { assert(useSplitDwarf() && "No split dwarf debug info?"); - // Don't pass an abbrev symbol, using a constant zero instead so as not to - // emit relocations into the dwo file. - InfoHolder.emitUnits(/* AbbrevSymbol */ nullptr); + // Don't emit relocations into the dwo file. + InfoHolder.emitUnits(/* UseOffsets */ true); } // Emit the .debug_abbrev.dwo section for separated dwarf. This contains the @@ -2058,7 +1926,7 @@ static uint64_t makeTypeSignature(StringRef Identifier) { // appropriately. MD5::MD5Result Result; Hash.final(Result); - return *reinterpret_cast<support::ulittle64_t *>(Result + 8); + return support::endian::read64le(Result + 8); } void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 1c0e163..74db3ef 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -88,7 +88,8 @@ public: : Var(V), Expr(1, E), TheDIE(nullptr), DotDebugLocOffset(~0U), MInsn(nullptr), DD(DD) { FrameIndex.push_back(FI); - assert(Var.Verify() && E.Verify()); + assert(Var.Verify()); + assert(!E || E->isValid()); } /// Construct a DbgVariable from a DEBUG_VALUE. @@ -243,25 +244,10 @@ class DwarfDebug : public AsmPrinterHandler { // If nonnull, stores the CU in which the previous subprogram was contained. const DwarfCompileUnit *PrevCU; - // Section Symbols: these are assembler temporary labels that are emitted at - // the beginning of each supported dwarf section. These are used to form - // section offsets and are created by EmitSectionLabels. - MCSymbol *DwarfInfoSectionSym, *DwarfAbbrevSectionSym; - MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym; - MCSymbol *DwarfDebugLocSectionSym, *DwarfLineSectionSym, *DwarfAddrSectionSym; - MCSymbol *FunctionBeginSym, *FunctionEndSym; - MCSymbol *DwarfInfoDWOSectionSym, *DwarfAbbrevDWOSectionSym; - MCSymbol *DwarfTypesDWOSectionSym; - MCSymbol *DwarfStrDWOSectionSym; - MCSymbol *DwarfGnuPubNamesSectionSym, *DwarfGnuPubTypesSectionSym; - // As an optimization, there is no need to emit an entry in the directory // table for the same directory as DW_AT_comp_dir. StringRef CompilationDir; - // Counter for assigning globally unique IDs for ranges. - unsigned GlobalRangeCount; - // Holder for the file specific debug information. DwarfFile InfoHolder; @@ -290,6 +276,9 @@ class DwarfDebug : public AsmPrinterHandler { // text. bool UsedNonDefaultText; + // Whether to use the GNU TLS opcode (instead of the standard opcode). + bool UseGNUTLSOpcode; + // Version of dwarf we're emitting. unsigned DwarfVersion; @@ -318,6 +307,7 @@ class DwarfDebug : public AsmPrinterHandler { // True iff there are multiple CUs in this module. bool SingleCU; bool IsDarwin; + bool IsPS4; AddressPool AddrPool; @@ -347,9 +337,6 @@ class DwarfDebug : public AsmPrinterHandler { /// \brief Construct a DIE for this abstract scope. void constructAbstractSubprogramScopeDIE(LexicalScope *Scope); - /// \brief Emit initial Dwarf sections with a label at the start of each one. - void emitSectionLabels(); - /// \brief Compute the size and offset of a DIE given an incoming Offset. unsigned computeSizeAndOffset(DIE *Die, unsigned Offset); @@ -373,13 +360,9 @@ class DwarfDebug : public AsmPrinterHandler { /// \brief Emit the abbreviation section. void emitAbbreviations(); - /// \brief Emit the last address of the section and the end of - /// the line matrix. - void emitEndOfLineMatrix(unsigned SectionEnd); - /// \brief Emit a specified accelerator table. void emitAccel(DwarfAccelTable &Accel, const MCSection *Section, - StringRef TableName, StringRef SymName); + StringRef TableName); /// \brief Emit visible names into a hashed accelerator table section. void emitAccelNames(); @@ -540,8 +523,9 @@ public: SymSize[Sym] = Size; } - /// \brief Recursively Emits a debug information entry. - void emitDIE(DIE &Die); + /// \brief Returns whether to use DW_OP_GNU_push_tls_address, instead of the + /// standard DW_OP_form_tls_address opcode + bool useGNUTLSOpcode() const { return UseGNUTLSOpcode; } // Experimental DWARF5 features. @@ -556,15 +540,6 @@ public: /// Returns the Dwarf Version. unsigned getDwarfVersion() const { return DwarfVersion; } - /// Returns the section symbol for the .debug_loc section. - MCSymbol *getDebugLocSym() const { return DwarfDebugLocSectionSym; } - - /// Returns the section symbol for the .debug_str section. - MCSymbol *getDebugStrSym() const { return DwarfStrSectionSym; } - - /// Returns the section symbol for the .debug_ranges section. - MCSymbol *getRangeSectionSym() const { return DwarfDebugRangeSectionSym; } - /// Returns the previous CU that was being updated const DwarfCompileUnit *getPrevCU() const { return PrevCU; } void setPrevCU(const DwarfCompileUnit *PrevCU) { this->PrevCU = PrevCU; } @@ -577,7 +552,8 @@ public: /// \brief Emit an entry for the debug loc section. This can be used to /// handle an entry that's going to be emitted into the debug loc section. - void emitDebugLocEntry(ByteStreamer &Streamer, const DebugLocEntry &Entry); + void emitDebugLocEntry(ByteStreamer &Streamer, + const DebugLocEntry &Entry); /// \brief emit a single value for the debug loc section. void emitDebugLocValue(ByteStreamer &Streamer, const DebugLocEntry::Value &Value, @@ -621,8 +597,6 @@ public: void addAccelType(StringRef Name, const DIE &Die, char Flags); const MachineFunction *getCurrentFunction() const { return CurFn; } - const MCSymbol *getFunctionBeginSym() const { return FunctionBeginSym; } - const MCSymbol *getFunctionEndSym() const { return FunctionEndSym; } iterator_range<ImportedEntityMap::const_iterator> findImportedEntitiesForScope(const MDNode *Scope) const { @@ -642,12 +616,6 @@ public: /// \brief Return Label immediately following the instruction. MCSymbol *getLabelAfterInsn(const MachineInstr *MI); - // FIXME: Consider rolling ranges up into DwarfDebug since we use a single - // range_base anyway, so there's no need to keep them as separate per-CU range - // lists. (though one day we might end up with a range.dwo section, in which - // case it'd go to DwarfFile) - unsigned getNextRangeNumber() { return GlobalRangeCount++; } - // FIXME: Sink these functions down into DwarfFile/Dwarf*Unit. SmallPtrSet<const MDNode *, 16> &getProcessedSPNodes() { diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h index e8867c0..6eaf707 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/lib/CodeGen/AsmPrinter/DwarfException.h @@ -21,17 +21,24 @@ namespace llvm { class MachineFunction; class ARMTargetStreamer; -class DwarfCFIException : public EHStreamer { - /// shouldEmitPersonality - Per-function flag to indicate if .cfi_personality - /// should be emitted. +class DwarfCFIExceptionBase : public EHStreamer { +protected: + DwarfCFIExceptionBase(AsmPrinter *A); + + /// Per-function flag to indicate if frame CFI info should be emitted. + bool shouldEmitCFI; + + void markFunctionEnd() override; +}; + +class DwarfCFIException : public DwarfCFIExceptionBase { + /// Per-function flag to indicate if .cfi_personality should be emitted. bool shouldEmitPersonality; - /// shouldEmitLSDA - Per-function flag to indicate if .cfi_lsda - /// should be emitted. + /// Per-function flag to indicate if .cfi_lsda should be emitted. bool shouldEmitLSDA; - /// shouldEmitMoves - Per-function flag to indicate if frame moves info - /// should be emitted. + /// Per-function flag to indicate if frame moves info should be emitted. bool shouldEmitMoves; AsmPrinter::CFIMoveType moveTypeModule; @@ -43,26 +50,21 @@ public: DwarfCFIException(AsmPrinter *A); virtual ~DwarfCFIException(); - /// endModule - Emit all exception information that should come after the - /// content. + /// Emit all exception information that should come after the content. void endModule() override; - /// beginFunction - Gather pre-function exception information. Assumes being - /// emitted immediately after the function entry point. + /// Gather pre-function exception information. Assumes being emitted + /// immediately after the function entry point. void beginFunction(const MachineFunction *MF) override; - /// endFunction - Gather and emit post-function exception information. + /// Gather and emit post-function exception information. void endFunction(const MachineFunction *) override; }; -class ARMException : public EHStreamer { +class ARMException : public DwarfCFIExceptionBase { void emitTypeInfos(unsigned TTypeEncoding) override; ARMTargetStreamer &getTargetStreamer(); - /// shouldEmitCFI - Per-function flag to indicate if frame CFI info - /// should be emitted. - bool shouldEmitCFI; - public: //===--------------------------------------------------------------------===// // Main entry points. @@ -70,15 +72,14 @@ public: ARMException(AsmPrinter *A); virtual ~ARMException(); - /// endModule - Emit all exception information that should come after the - /// content. + /// Emit all exception information that should come after the content. void endModule() override; - /// beginFunction - Gather pre-function exception information. Assumes being - /// emitted immediately after the function entry point. + /// Gather pre-function exception information. Assumes being emitted + /// immediately after the function entry point. void beginFunction(const MachineFunction *MF) override; - /// endFunction - Gather and emit post-function exception information. + /// Gather and emit post-function exception information. void endFunction(const MachineFunction *) override; }; } // End of namespace llvm diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index fcab067..489e455 100644 --- a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -22,14 +22,6 @@ using namespace llvm; -const TargetRegisterInfo *DwarfExpression::getTRI() const { - return AP.TM.getSubtargetImpl()->getRegisterInfo(); -} - -unsigned DwarfExpression::getDwarfVersion() const { - return AP.getDwarfDebug()->getDwarfVersion(); -} - void DwarfExpression::AddReg(int DwarfReg, const char *Comment) { assert(DwarfReg >= 0 && "invalid negative dwarf register number"); if (DwarfReg < 32) { @@ -74,28 +66,28 @@ void DwarfExpression::AddShr(unsigned ShiftBy) { } bool DwarfExpression::AddMachineRegIndirect(unsigned MachineReg, int Offset) { - int DwarfReg = getTRI()->getDwarfRegNum(MachineReg, false); - if (DwarfReg < 0) - return false; - if (isFrameRegister(MachineReg)) { // If variable offset is based in frame register then use fbreg. EmitOp(dwarf::DW_OP_fbreg); EmitSigned(Offset); - } else { - AddRegIndirect(DwarfReg, Offset); + return true; } + + int DwarfReg = TRI.getDwarfRegNum(MachineReg, false); + if (DwarfReg < 0) + return false; + + AddRegIndirect(DwarfReg, Offset); return true; } bool DwarfExpression::AddMachineRegPiece(unsigned MachineReg, unsigned PieceSizeInBits, unsigned PieceOffsetInBits) { - const TargetRegisterInfo *TRI = getTRI(); - if (!TRI->isPhysicalRegister(MachineReg)) + if (!TRI.isPhysicalRegister(MachineReg)) return false; - int Reg = TRI->getDwarfRegNum(MachineReg, false); + int Reg = TRI.getDwarfRegNum(MachineReg, false); // If this is a valid register number, emit it. if (Reg >= 0) { @@ -107,12 +99,12 @@ bool DwarfExpression::AddMachineRegPiece(unsigned MachineReg, // Walk up the super-register chain until we find a valid number. // For example, EAX on x86_64 is a 32-bit piece of RAX with offset 0. - for (MCSuperRegIterator SR(MachineReg, TRI); SR.isValid(); ++SR) { - Reg = TRI->getDwarfRegNum(*SR, false); + for (MCSuperRegIterator SR(MachineReg, &TRI); SR.isValid(); ++SR) { + Reg = TRI.getDwarfRegNum(*SR, false); if (Reg >= 0) { - unsigned Idx = TRI->getSubRegIndex(*SR, MachineReg); - unsigned Size = TRI->getSubRegIdxSize(Idx); - unsigned RegOffset = TRI->getSubRegIdxOffset(Idx); + unsigned Idx = TRI.getSubRegIndex(*SR, MachineReg); + unsigned Size = TRI.getSubRegIdxSize(Idx); + unsigned RegOffset = TRI.getSubRegIdxOffset(Idx); AddReg(Reg, "super-register"); if (PieceOffsetInBits == RegOffset) { AddOpPiece(Size, RegOffset); @@ -136,15 +128,15 @@ bool DwarfExpression::AddMachineRegPiece(unsigned MachineReg, // efficient DW_OP_piece. unsigned CurPos = PieceOffsetInBits; // The size of the register in bits, assuming 8 bits per byte. - unsigned RegSize = TRI->getMinimalPhysRegClass(MachineReg)->getSize() * 8; + unsigned RegSize = TRI.getMinimalPhysRegClass(MachineReg)->getSize() * 8; // Keep track of the bits in the register we already emitted, so we // can avoid emitting redundant aliasing subregs. SmallBitVector Coverage(RegSize, false); - for (MCSubRegIterator SR(MachineReg, TRI); SR.isValid(); ++SR) { - unsigned Idx = TRI->getSubRegIndex(MachineReg, *SR); - unsigned Size = TRI->getSubRegIdxSize(Idx); - unsigned Offset = TRI->getSubRegIdxOffset(Idx); - Reg = TRI->getDwarfRegNum(*SR, false); + for (MCSubRegIterator SR(MachineReg, &TRI); SR.isValid(); ++SR) { + unsigned Idx = TRI.getSubRegIndex(MachineReg, *SR); + unsigned Size = TRI.getSubRegIdxSize(Idx); + unsigned Offset = TRI.getSubRegIdxOffset(Idx); + Reg = TRI.getDwarfRegNum(*SR, false); // Intersection between the bits we already emitted and the bits // covered by this subregister. @@ -180,7 +172,7 @@ void DwarfExpression::AddSignedConstant(int Value) { // value, so the producers and consumers started to rely on heuristics // to disambiguate the value vs. location status of the expression. // See PR21176 for more details. - if (getDwarfVersion() >= 4) + if (DwarfVersion >= 4) EmitOp(dwarf::DW_OP_stack_value); } @@ -188,7 +180,7 @@ void DwarfExpression::AddUnsignedConstant(unsigned Value) { EmitOp(dwarf::DW_OP_constu); EmitUnsigned(Value); // cf. comment in DwarfExpression::AddSignedConstant(). - if (getDwarfVersion() >= 4) + if (DwarfVersion >= 4) EmitOp(dwarf::DW_OP_stack_value); } @@ -204,11 +196,12 @@ bool DwarfExpression::AddMachineRegExpression(DIExpression Expr, unsigned MachineReg, unsigned PieceOffsetInBits) { auto I = Expr.begin(); - // Pattern-match combinations for which more efficient representations exist - // first. - if (I == Expr.end()) + auto E = Expr.end(); + if (I == E) return AddMachineRegPiece(MachineReg); + // Pattern-match combinations for which more efficient representations exist + // first. bool ValidReg = false; switch (*I) { case dwarf::DW_OP_bit_piece: { @@ -218,20 +211,23 @@ bool DwarfExpression::AddMachineRegExpression(DIExpression Expr, return AddMachineRegPiece(MachineReg, SizeInBits, getOffsetOrZero(OffsetInBits, PieceOffsetInBits)); } - case dwarf::DW_OP_plus: + case dwarf::DW_OP_plus: { // [DW_OP_reg,Offset,DW_OP_plus,DW_OP_deref] --> [DW_OP_breg,Offset]. - if (I->getNext() == dwarf::DW_OP_deref) { + auto N = I->getNext(); + if ((N != E) && (*N == dwarf::DW_OP_deref)) { unsigned Offset = I->getArg(1); ValidReg = AddMachineRegIndirect(MachineReg, Offset); std::advance(I, 2); break; } else ValidReg = AddMachineRegPiece(MachineReg); - case dwarf::DW_OP_deref: - // [DW_OP_reg,DW_OP_deref] --> [DW_OP_breg]. - ValidReg = AddMachineRegIndirect(MachineReg); - ++I; - break; + } + case dwarf::DW_OP_deref: { + // [DW_OP_reg,DW_OP_deref] --> [DW_OP_breg]. + ValidReg = AddMachineRegIndirect(MachineReg); + ++I; + break; + } default: llvm_unreachable("unsupported operand"); } @@ -240,7 +236,7 @@ bool DwarfExpression::AddMachineRegExpression(DIExpression Expr, return false; // Emit remaining elements of the expression. - AddExpression(I, Expr.end(), PieceOffsetInBits); + AddExpression(I, E, PieceOffsetInBits); return true; } diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.h b/lib/CodeGen/AsmPrinter/DwarfExpression.h index b90b7b6..985d52c 100644 --- a/lib/CodeGen/AsmPrinter/DwarfExpression.h +++ b/lib/CodeGen/AsmPrinter/DwarfExpression.h @@ -30,21 +30,22 @@ class DIELoc; /// entry. class DwarfExpression { protected: - const AsmPrinter &AP; // Various convenience accessors that extract things out of AsmPrinter. - const TargetRegisterInfo *getTRI() const; - unsigned getDwarfVersion() const; + const TargetRegisterInfo &TRI; + unsigned DwarfVersion; public: - DwarfExpression(const AsmPrinter &AP) : AP(AP) {} + DwarfExpression(const TargetRegisterInfo &TRI, + unsigned DwarfVersion) + : TRI(TRI), DwarfVersion(DwarfVersion) {} virtual ~DwarfExpression() {} /// Output a dwarf operand and an optional assembler comment. virtual void EmitOp(uint8_t Op, const char *Comment = nullptr) = 0; /// Emit a raw signed value. - virtual void EmitSigned(int Value) = 0; + virtual void EmitSigned(int64_t Value) = 0; /// Emit a raw unsigned value. - virtual void EmitUnsigned(unsigned Value) = 0; + virtual void EmitUnsigned(uint64_t Value) = 0; /// Return whether the given machine register is the frame register in the /// current function. virtual bool isFrameRegister(unsigned MachineReg) = 0; @@ -105,27 +106,27 @@ class DebugLocDwarfExpression : public DwarfExpression { ByteStreamer &BS; public: - DebugLocDwarfExpression(const AsmPrinter &AP, ByteStreamer &BS) - : DwarfExpression(AP), BS(BS) {} + DebugLocDwarfExpression(const TargetRegisterInfo &TRI, + unsigned DwarfVersion, ByteStreamer &BS) + : DwarfExpression(TRI, DwarfVersion), BS(BS) {} void EmitOp(uint8_t Op, const char *Comment = nullptr) override; - void EmitSigned(int Value) override; - void EmitUnsigned(unsigned Value) override; + void EmitSigned(int64_t Value) override; + void EmitUnsigned(uint64_t Value) override; bool isFrameRegister(unsigned MachineReg) override; }; /// DwarfExpression implementation for singular DW_AT_location. class DIEDwarfExpression : public DwarfExpression { +const AsmPrinter &AP; DwarfUnit &DU; DIELoc &DIE; public: - DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU, DIELoc &DIE) - : DwarfExpression(AP), DU(DU), DIE(DIE) {} - + DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU, DIELoc &DIE); void EmitOp(uint8_t Op, const char *Comment = nullptr) override; - void EmitSigned(int Value) override; - void EmitUnsigned(unsigned Value) override; + void EmitSigned(int64_t Value) override; + void EmitUnsigned(uint64_t Value) override; bool isFrameRegister(unsigned MachineReg) override; }; } diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/lib/CodeGen/AsmPrinter/DwarfFile.cpp index 3988f0d..60acc58e 100644 --- a/lib/CodeGen/AsmPrinter/DwarfFile.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfFile.cpp @@ -17,9 +17,8 @@ #include "llvm/Target/TargetLoweringObjectFile.h" namespace llvm { -DwarfFile::DwarfFile(AsmPrinter *AP, DwarfDebug &DD, StringRef Pref, - BumpPtrAllocator &DA) - : Asm(AP), DD(DD), StrPool(DA, *Asm, Pref) {} +DwarfFile::DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA) + : Asm(AP), StrPool(DA, *Asm, Pref) {} DwarfFile::~DwarfFile() {} @@ -48,15 +47,15 @@ void DwarfFile::addUnit(std::unique_ptr<DwarfUnit> U) { // Emit the various dwarf units to the unit section USection with // the abbreviations going into ASection. -void DwarfFile::emitUnits(const MCSymbol *ASectionSym) { +void DwarfFile::emitUnits(bool UseOffsets) { for (const auto &TheU : CUs) { DIE &Die = TheU->getUnitDie(); const MCSection *USection = TheU->getSection(); Asm->OutStreamer.SwitchSection(USection); - TheU->emitHeader(ASectionSym); + TheU->emitHeader(UseOffsets); - DD.emitDIE(Die); + Asm->emitDwarfDIE(Die); } } @@ -120,23 +119,13 @@ unsigned DwarfFile::computeSizeAndOffset(DIE &Die, unsigned Offset) { Die.setSize(Offset - Die.getOffset()); return Offset; } + void DwarfFile::emitAbbrevs(const MCSection *Section) { // Check to see if it is worth the effort. if (!Abbreviations.empty()) { // Start the debug abbrev section. Asm->OutStreamer.SwitchSection(Section); - - // For each abbrevation. - for (const DIEAbbrev *Abbrev : Abbreviations) { - // Emit the abbrevations code (base 1 index.) - Asm->EmitULEB128(Abbrev->getNumber(), "Abbreviation Code"); - - // Emit the abbreviations data. - Abbrev->Emit(Asm); - } - - // Mark end of abbreviations. - Asm->EmitULEB128(0, "EOM(3)"); + Asm->emitDwarfAbbrevs(Abbreviations); } } diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.h b/lib/CodeGen/AsmPrinter/DwarfFile.h index 35bf33a..c9de666 100644 --- a/lib/CodeGen/AsmPrinter/DwarfFile.h +++ b/lib/CodeGen/AsmPrinter/DwarfFile.h @@ -37,8 +37,6 @@ class DwarfFile { // Target of Dwarf emission, used for sizing of abbreviations. AsmPrinter *Asm; - DwarfDebug ⅅ - // Used to uniquely define abbreviations. FoldingSet<DIEAbbrev> AbbreviationsSet; @@ -62,8 +60,7 @@ class DwarfFile { DenseMap<const MDNode *, DIE *> MDTypeNodeToDieMap; public: - DwarfFile(AsmPrinter *AP, DwarfDebug &DD, StringRef Pref, - BumpPtrAllocator &DA); + DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA); ~DwarfFile(); @@ -83,7 +80,7 @@ public: /// \brief Emit all of the units to the section listed with the given /// abbreviation section. - void emitUnits(const MCSymbol *ASectionSym); + void emitUnits(bool UseOffsets); /// \brief Emit a set of abbreviations to the specific section. void emitAbbrevs(const MCSection *); diff --git a/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp index d76b66c..165ef16 100644 --- a/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp @@ -19,7 +19,7 @@ getEntry(AsmPrinter &Asm, std::pair<MCSymbol *, unsigned> &Entry = Pool[Str]; if (!Entry.first) { Entry.second = Pool.size() - 1; - Entry.first = Asm.GetTempSymbol(Prefix, Entry.second); + Entry.first = Asm.createTempSymbol(Prefix); } return Entry; } diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index b0c7d48..f6af73f 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -17,6 +17,7 @@ #include "DwarfDebug.h" #include "DwarfExpression.h" #include "llvm/ADT/APFloat.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" @@ -43,17 +44,23 @@ GenerateDwarfTypeUnits("generate-type-units", cl::Hidden, cl::desc("Generate DWARF4 type units."), cl::init(false)); +DIEDwarfExpression::DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU, + DIELoc &DIE) + : DwarfExpression(*AP.MF->getSubtarget().getRegisterInfo(), + AP.getDwarfDebug()->getDwarfVersion()), + AP(AP), DU(DU), DIE(DIE) {} + void DIEDwarfExpression::EmitOp(uint8_t Op, const char* Comment) { DU.addUInt(DIE, dwarf::DW_FORM_data1, Op); } -void DIEDwarfExpression::EmitSigned(int Value) { +void DIEDwarfExpression::EmitSigned(int64_t Value) { DU.addSInt(DIE, dwarf::DW_FORM_sdata, Value); } -void DIEDwarfExpression::EmitUnsigned(unsigned Value) { +void DIEDwarfExpression::EmitUnsigned(uint64_t Value) { DU.addUInt(DIE, dwarf::DW_FORM_udata, Value); } bool DIEDwarfExpression::isFrameRegister(unsigned MachineReg) { - return MachineReg == getTRI()->getFrameRegister(*AP.MF); + return MachineReg == TRI.getFrameRegister(*AP.MF); } @@ -257,12 +264,14 @@ void DwarfUnit::addIndexedString(DIE &Die, dwarf::Attribute Attribute, /// to be in the local string pool instead of indirected. void DwarfUnit::addLocalString(DIE &Die, dwarf::Attribute Attribute, StringRef String) { + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); MCSymbol *Symb = DU->getStringPool().getSymbol(*Asm, String); DIEValue *Value; if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) Value = new (DIEValueAllocator) DIELabel(Symb); else - Value = new (DIEValueAllocator) DIEDelta(Symb, DD->getDebugStrSym()); + Value = new (DIEValueAllocator) + DIEDelta(Symb, TLOF.getDwarfStrSection()->getBeginSymbol()); DIEValue *Str = new (DIEValueAllocator) DIEString(Value, String); Die.addValue(Attribute, dwarf::DW_FORM_strp, Str); } @@ -750,6 +759,15 @@ void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, bool Unsigned) { addBlock(Die, dwarf::DW_AT_const_value, Block); } +// Add a linkage name to the DIE. +void DwarfUnit::addLinkageName(DIE &Die, StringRef LinkageName) { + if (!LinkageName.empty()) + addString(Die, + DD->getDwarfVersion() >= 4 ? dwarf::DW_AT_linkage_name + : dwarf::DW_AT_MIPS_linkage_name, + GlobalValue::getRealLinkageName(LinkageName)); +} + /// addTemplateParams - Add template parameters into buffer. void DwarfUnit::addTemplateParams(DIE &Buffer, DIArray TParams) { // Add template parameters. @@ -1269,9 +1287,8 @@ bool DwarfUnit::applySubprogramDefinitionAttributes(DISubprogram SP, assert(((LinkageName.empty() || DeclLinkageName.empty()) || LinkageName == DeclLinkageName) && "decl has a linkage name and it is different"); - if (!LinkageName.empty() && DeclLinkageName.empty()) - addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, - GlobalValue::getRealLinkageName(LinkageName)); + if (DeclLinkageName.empty()) + addLinkageName(SPDie, LinkageName); if (!DeclDie) return false; @@ -1344,9 +1361,8 @@ void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie, if (SP.isOptimized()) addFlag(SPDie, dwarf::DW_AT_APPLE_optimized); - if (unsigned isa = Asm->getISAEncoding(SP.getFunction())) { + if (unsigned isa = Asm->getISAEncoding()) addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa); - } if (SP.isLValueReference()) addFlag(SPDie, dwarf::DW_AT_reference); @@ -1597,7 +1613,7 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(DIDerivedType DT) { return &StaticMemberDIE; } -void DwarfUnit::emitHeader(const MCSymbol *ASectionSym) const { +void DwarfUnit::emitHeader(bool UseOffsets) { // Emit size of content not including length itself Asm->OutStreamer.AddComment("Length of Unit"); Asm->EmitInt32(getHeaderSize() + UnitDie.getSize()); @@ -1605,14 +1621,16 @@ void DwarfUnit::emitHeader(const MCSymbol *ASectionSym) const { Asm->OutStreamer.AddComment("DWARF version number"); Asm->EmitInt16(DD->getDwarfVersion()); Asm->OutStreamer.AddComment("Offset Into Abbrev. Section"); + // We share one abbreviations table across all units so it's always at the // start of the section. Use a relocatable offset where needed to ensure // linking doesn't invalidate that offset. - if (ASectionSym) - Asm->EmitSectionOffset(ASectionSym, ASectionSym); + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + if (!UseOffsets) + Asm->emitSectionOffset(TLOF.getDwarfAbbrevSection()->getBeginSymbol()); else - // Use a constant value when no symbol is provided. Asm->EmitInt32(0); + Asm->OutStreamer.AddComment("Address Size (in bytes)"); Asm->EmitInt8(Asm->getDataLayout().getPointerSize()); } @@ -1622,8 +1640,8 @@ void DwarfUnit::initSection(const MCSection *Section) { this->Section = Section; } -void DwarfTypeUnit::emitHeader(const MCSymbol *ASectionSym) const { - DwarfUnit::emitHeader(ASectionSym); +void DwarfTypeUnit::emitHeader(bool UseOffsets) { + DwarfUnit::emitHeader(UseOffsets); Asm->OutStreamer.AddComment("Type Signature"); Asm->OutStreamer.EmitIntValue(TypeSignature, sizeof(TypeSignature)); Asm->OutStreamer.AddComment("Type DIE Offset"); diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h index 7a5e47d..81c5821 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -120,7 +120,6 @@ protected: DwarfUnit(unsigned UID, dwarf::Tag, DICompileUnit CU, AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU); - void initSection(const MCSection *Section); /// Add a string attribute data and value. void addLocalString(DIE &Die, dwarf::Attribute Attribute, StringRef Str); @@ -132,6 +131,8 @@ protected: public: virtual ~DwarfUnit(); + void initSection(const MCSection *Section); + const MCSection *getSection() const { assert(Section); return Section; @@ -251,6 +252,9 @@ public: void addConstantFPValue(DIE &Die, const MachineOperand &MO); void addConstantFPValue(DIE &Die, const ConstantFP *CFP); + /// \brief Add a linkage name, if it isn't empty. + void addLinkageName(DIE &Die, StringRef LinkageName); + /// addTemplateParams - Add template parameters in buffer. void addTemplateParams(DIE &Buffer, DIArray TParams); @@ -321,7 +325,7 @@ public: } /// Emit the header for this unit, not including the initial length field. - virtual void emitHeader(const MCSymbol *ASectionSym) const; + virtual void emitHeader(bool UseOffsets); virtual DwarfCompileUnit &getCU() = 0; @@ -423,12 +427,11 @@ public: void setType(const DIE *Ty) { this->Ty = Ty; } /// Emit the header for this unit, not including the initial length field. - void emitHeader(const MCSymbol *ASectionSym) const override; + void emitHeader(bool UseOffsets) override; unsigned getHeaderSize() const override { return DwarfUnit::getHeaderSize() + sizeof(uint64_t) + // Type Signature sizeof(uint32_t); // Type DIE Offset } - using DwarfUnit::initSection; DwarfCompileUnit &getCU() override { return CU; } }; } // end llvm namespace diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/lib/CodeGen/AsmPrinter/EHStreamer.cpp index 4841814..14df4c9 100644 --- a/lib/CodeGen/AsmPrinter/EHStreamer.cpp +++ b/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -436,12 +436,7 @@ void EHStreamer::emitExceptionTable() { Asm->OutContext.GetOrCreateSymbol(Twine("GCC_except_table")+ Twine(Asm->getFunctionNumber())); Asm->OutStreamer.EmitLabel(GCCETSym); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("exception", - Asm->getFunctionNumber())); - - if (IsSJLJ) - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("_LSDA_", - Asm->getFunctionNumber())); + Asm->OutStreamer.EmitLabel(Asm->getCurExceptionSym()); // Emit the LSDA header. Asm->EmitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart"); @@ -552,16 +547,14 @@ void EHStreamer::emitExceptionTable() { I = CallSites.begin(), E = CallSites.end(); I != E; ++I) { const CallSiteEntry &S = *I; - MCSymbol *EHFuncBeginSym = - Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber()); + MCSymbol *EHFuncBeginSym = Asm->getFunctionBegin(); MCSymbol *BeginLabel = S.BeginLabel; if (!BeginLabel) BeginLabel = EHFuncBeginSym; MCSymbol *EndLabel = S.EndLabel; if (!EndLabel) - EndLabel = Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber()); - + EndLabel = Asm->getFunctionEnd(); // Offset of the call site relative to the previous call site, counted in // number of 16-byte bundles. The first call site is counted relative to @@ -689,19 +682,3 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) { Asm->EmitULEB128(TypeID); } } - -/// Emit all exception information that should come after the content. -void EHStreamer::endModule() { - llvm_unreachable("Should be implemented"); -} - -/// Gather pre-function exception information. Assumes it's being emitted -/// immediately after the function entry point. -void EHStreamer::beginFunction(const MachineFunction *MF) { - llvm_unreachable("Should be implemented"); -} - -/// Gather and emit post-function exception information. -void EHStreamer::endFunction(const MachineFunction *) { - llvm_unreachable("Should be implemented"); -} diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.h b/lib/CodeGen/AsmPrinter/EHStreamer.h index 9b316ff..94d0585 100644 --- a/lib/CodeGen/AsmPrinter/EHStreamer.h +++ b/lib/CodeGen/AsmPrinter/EHStreamer.h @@ -125,16 +125,6 @@ public: EHStreamer(AsmPrinter *A); virtual ~EHStreamer(); - /// Emit all exception information that should come after the content. - void endModule() override; - - /// Gather pre-function exception information. Assumes being emitted - /// immediately after the function entry point. - void beginFunction(const MachineFunction *MF) override; - - /// Gather and emit post-function exception information. - void endFunction(const MachineFunction *) override; - // Unused. void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {} void beginInstruction(const MachineInstr *MI) override {} diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/lib/CodeGen/AsmPrinter/Win64Exception.cpp index 2b03877..7d76ead 100644 --- a/lib/CodeGen/AsmPrinter/Win64Exception.cpp +++ b/lib/CodeGen/AsmPrinter/Win64Exception.cpp @@ -48,8 +48,6 @@ Win64Exception::~Win64Exception() {} void Win64Exception::endModule() { } -/// beginFunction - Gather pre-function exception information. Assumes it's -/// being emitted immediately after the function entry point. void Win64Exception::beginFunction(const MachineFunction *MF) { shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false; @@ -80,9 +78,6 @@ void Win64Exception::beginFunction(const MachineFunction *MF) { const MCSymbol *PersHandlerSym = TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI); Asm->OutStreamer.EmitWinEHHandler(PersHandlerSym, true, true); - - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin", - Asm->getFunctionNumber())); } /// endFunction - Gather and emit post-function exception information. @@ -91,9 +86,6 @@ void Win64Exception::endFunction(const MachineFunction *) { if (!shouldEmitPersonality && !shouldEmitMoves) return; - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end", - Asm->getFunctionNumber())); - // Map all labels and get rid of any dead landing pads. MMI->TidyLandingPads(); @@ -170,10 +162,8 @@ void Win64Exception::emitCSpecificHandlerTable() { SmallVector<CallSiteEntry, 64> CallSites; computeCallSiteTable(CallSites, LandingPads, FirstActions); - MCSymbol *EHFuncBeginSym = - Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber()); - MCSymbol *EHFuncEndSym = - Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber()); + MCSymbol *EHFuncBeginSym = Asm->getFunctionBegin(); + MCSymbol *EHFuncEndSym = Asm->getFunctionEnd(); // Emit the number of table entries. unsigned NumEntries = 0; diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp index b5e0929..d2b4eec 100644 --- a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp +++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp @@ -190,8 +190,11 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) { return; assert(FI.End && "Don't know where the function ends?"); - StringRef FuncName = getDISubprogram(GV).getDisplayName(), - GVName = GV->getName(); + StringRef GVName = GV->getName(); + StringRef FuncName; + if (DISubprogram SP = getDISubprogram(GV)) + FuncName = SP.getDisplayName(); + // FIXME Clang currently sets DisplayName to "bar" for a C++ // "namespace_foo::bar" function, see PR21528. Luckily, dbghelp.dll is trying // to demangle display names anyways, so let's just put a mangled name into @@ -364,10 +367,7 @@ void WinCodeViewLineTables::endFunction(const MachineFunction *MF) { FnDebugInfo.erase(GV); VisitedFunctions.pop_back(); } else { - // Define end label for subprogram. - MCSymbol *FunctionEndSym = Asm->OutStreamer.getContext().CreateTempSymbol(); - Asm->OutStreamer.EmitLabel(FunctionEndSym); - CurFn->End = FunctionEndSym; + CurFn->End = Asm->getFunctionEnd(); } CurFn = nullptr; } diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp index 4b64be0..fa17108 100644 --- a/lib/CodeGen/AtomicExpandPass.cpp +++ b/lib/CodeGen/AtomicExpandPass.cpp @@ -48,7 +48,7 @@ namespace { bool expandAtomicLoadToLL(LoadInst *LI); bool expandAtomicLoadToCmpXchg(LoadInst *LI); bool expandAtomicStore(StoreInst *SI); - bool expandAtomicRMW(AtomicRMWInst *AI); + bool tryExpandAtomicRMW(AtomicRMWInst *AI); bool expandAtomicRMWToLLSC(AtomicRMWInst *AI); bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI); bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); @@ -135,9 +135,12 @@ bool AtomicExpand::runOnFunction(Function &F) { // - into a load if it is idempotent // - into a Cmpxchg/LL-SC loop otherwise // we try them in that order. - MadeChange |= - (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) || - (TLI->shouldExpandAtomicRMWInIR(RMWI) && expandAtomicRMW(RMWI)); + + if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) { + MadeChange = true; + } else { + MadeChange |= tryExpandAtomicRMW(RMWI); + } } else if (CASI && TLI->hasLoadLinkedStoreConditional()) { MadeChange |= expandAtomicCmpXchg(CASI); } @@ -211,7 +214,7 @@ bool AtomicExpand::expandAtomicStore(StoreInst *SI) { // atomic if implemented as a native store. So we replace them by an // atomic swap, that can be implemented for example as a ldrex/strex on ARM // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes. - // It is the responsibility of the target to only return true in + // It is the responsibility of the target to only signal expansion via // shouldExpandAtomicRMW in cases where this is required and possible. IRBuilder<> Builder(SI); AtomicRMWInst *AI = @@ -220,14 +223,26 @@ bool AtomicExpand::expandAtomicStore(StoreInst *SI) { SI->eraseFromParent(); // Now we have an appropriate swap instruction, lower it as usual. - return expandAtomicRMW(AI); + return tryExpandAtomicRMW(AI); } -bool AtomicExpand::expandAtomicRMW(AtomicRMWInst *AI) { - if (TLI->hasLoadLinkedStoreConditional()) +bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { + switch (TLI->shouldExpandAtomicRMWInIR(AI)) { + case TargetLoweringBase::AtomicRMWExpansionKind::None: + return false; + case TargetLoweringBase::AtomicRMWExpansionKind::LLSC: { + assert(TLI->hasLoadLinkedStoreConditional() && + "TargetLowering requested we expand AtomicRMW instruction into " + "load-linked/store-conditional combos, but such instructions aren't " + "supported"); + return expandAtomicRMWToLLSC(AI); - else + } + case TargetLoweringBase::AtomicRMWExpansionKind::CmpXChg: { return expandAtomicRMWToCmpXchg(AI); + } + } + llvm_unreachable("Unhandled case in tryExpandAtomicRMW"); } /// Emit IR to implement the given atomicrmw operation on values in registers, diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index b8f05cd..abe7ca1 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -24,6 +24,7 @@ #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" @@ -727,6 +728,62 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, return true; } +static bool hasIdenticalMMOs(const MachineInstr *MI1, const MachineInstr *MI2) { + auto I1 = MI1->memoperands_begin(), E1 = MI1->memoperands_end(); + auto I2 = MI2->memoperands_begin(), E2 = MI2->memoperands_end(); + if ((E1 - I1) != (E2 - I2)) + return false; + for (; I1 != E1; ++I1, ++I2) { + if (**I1 != **I2) + return false; + } + return true; +} + +static void +removeMMOsFromMemoryOperations(MachineBasicBlock::iterator MBBIStartPos, + MachineBasicBlock &MBBCommon) { + // Remove MMOs from memory operations in the common block + // when they do not match the ones from the block being tail-merged. + // This ensures later passes conservatively compute dependencies. + MachineBasicBlock *MBB = MBBIStartPos->getParent(); + // Note CommonTailLen does not necessarily matches the size of + // the common BB nor all its instructions because of debug + // instructions differences. + unsigned CommonTailLen = 0; + for (auto E = MBB->end(); MBBIStartPos != E; ++MBBIStartPos) + ++CommonTailLen; + + MachineBasicBlock::reverse_iterator MBBI = MBB->rbegin(); + MachineBasicBlock::reverse_iterator MBBIE = MBB->rend(); + MachineBasicBlock::reverse_iterator MBBICommon = MBBCommon.rbegin(); + MachineBasicBlock::reverse_iterator MBBIECommon = MBBCommon.rend(); + + while (CommonTailLen--) { + assert(MBBI != MBBIE && "Reached BB end within common tail length!"); + (void)MBBIE; + + if (MBBI->isDebugValue()) { + ++MBBI; + continue; + } + + while ((MBBICommon != MBBIECommon) && MBBICommon->isDebugValue()) + ++MBBICommon; + + assert(MBBICommon != MBBIECommon && + "Reached BB end within common tail length!"); + assert(MBBICommon->isIdenticalTo(&*MBBI) && "Expected matching MIIs!"); + + if (MBBICommon->mayLoad() || MBBICommon->mayStore()) + if (!hasIdenticalMMOs(&*MBBI, &*MBBICommon)) + MBBICommon->clearMemRefs(); + + ++MBBI; + ++MBBICommon; + } +} + // See if any of the blocks in MergePotentials (which all have a common single // successor, or all have no successor) can be tail-merged. If there is a // successor, any blocks in MergePotentials that are not tail-merged and @@ -761,7 +818,7 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, // Sort by hash value so that blocks with identical end sequences sort // together. - std::stable_sort(MergePotentials.begin(), MergePotentials.end()); + array_pod_sort(MergePotentials.begin(), MergePotentials.end()); // Walk through equivalence sets looking for actual exact matches. while (MergePotentials.size() > 1) { @@ -840,6 +897,8 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, continue; DEBUG(dbgs() << "BB#" << SameTails[i].getBlock()->getNumber() << (i == e-1 ? "" : ", ")); + // Remove MMOs from memory operations as needed. + removeMMOsFromMemoryOperations(SameTails[i].getTailStartPos(), *MBB); // Hack the end off BB i, making it jump to BB commonTailIndex instead. ReplaceTailWithBranchTo(SameTails[i].getTailStartPos(), MBB); // BB i is no longer a predecessor of SuccBB; remove it from the worklist. diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index f21d4d2..ef57638 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -19,7 +19,6 @@ add_llvm_library(LLVMCodeGen ExecutionDepsFix.cpp ExpandISelPseudos.cpp ExpandPostRAPseudos.cpp - ForwardControlFlowIntegrity.cpp GCMetadata.cpp GCMetadataPrinter.cpp GCRootLowering.cpp @@ -29,7 +28,6 @@ add_llvm_library(LLVMCodeGen InlineSpiller.cpp InterferenceCache.cpp IntrinsicLowering.cpp - JumpInstrTables.cpp LLVMTargetMachine.cpp LatencyPriorityQueue.cpp LexicalScopes.cpp diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index 7c0068e..da66639 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -24,9 +24,10 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeBranchFolderPassPass(Registry); initializeCodeGenPreparePass(Registry); initializeDeadMachineInstructionElimPass(Registry); + initializeDwarfEHPreparePass(Registry); initializeEarlyIfConverterPass(Registry); - initializeExpandPostRAPass(Registry); initializeExpandISelPseudosPass(Registry); + initializeExpandPostRAPass(Registry); initializeFinalizeMachineBundlesPass(Registry); initializeGCMachineCodeAnalysisPass(Registry); initializeGCModuleInfoPass(Registry); @@ -36,31 +37,34 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeLiveStacksPass(Registry); initializeLiveVariablesPass(Registry); initializeLocalStackSlotPassPass(Registry); + initializeLowerIntrinsicsPass(Registry); initializeMachineBlockFrequencyInfoPass(Registry); initializeMachineBlockPlacementPass(Registry); initializeMachineBlockPlacementStatsPass(Registry); - initializeMachineCopyPropagationPass(Registry); - initializeMachineCombinerPass(Registry); initializeMachineCSEPass(Registry); + initializeMachineCombinerPass(Registry); + initializeMachineCopyPropagationPass(Registry); initializeMachineDominatorTreePass(Registry); - initializeMachinePostDominatorTreePass(Registry); + initializeMachineFunctionPrinterPassPass(Registry); initializeMachineLICMPass(Registry); initializeMachineLoopInfoPass(Registry); initializeMachineModuleInfoPass(Registry); + initializeMachinePostDominatorTreePass(Registry); initializeMachineSchedulerPass(Registry); initializeMachineSinkingPass(Registry); initializeMachineVerifierPassPass(Registry); initializeOptimizePHIsPass(Registry); + initializePEIPass(Registry); initializePHIEliminationPass(Registry); initializePeepholeOptimizerPass(Registry); initializePostMachineSchedulerPass(Registry); initializePostRASchedulerPass(Registry); initializeProcessImplicitDefsPass(Registry); - initializePEIPass(Registry); initializeRegisterCoalescerPass(Registry); initializeSlotIndexesPass(Registry); - initializeStackProtectorPass(Registry); initializeStackColoringPass(Registry); + initializeStackMapLivenessPass(Registry); + initializeStackProtectorPass(Registry); initializeStackSlotColoringPass(Registry); initializeTailDuplicatePassPass(Registry); initializeTargetPassConfigPass(Registry); @@ -70,9 +74,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeUnreachableMachineBlockElimPass(Registry); initializeVirtRegMapPass(Registry); initializeVirtRegRewriterPass(Registry); - initializeLowerIntrinsicsPass(Registry); - initializeMachineFunctionPrinterPassPass(Registry); - initializeStackMapLivenessPass(Registry); + initializeWinEHPreparePass(Registry); } void LLVMInitializeCodeGen(LLVMPassRegistryRef R) { diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index c0d7dca..6c9d048 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -124,7 +124,6 @@ class TypePromotionTransaction; const TargetLowering *TLI; const TargetTransformInfo *TTI; const TargetLibraryInfo *TLInfo; - DominatorTree *DT; /// CurInstIterator - As we scan instructions optimizing them, this is the /// next instruction to optimize. Xforms that can invalidate this should @@ -142,8 +141,7 @@ class TypePromotionTransaction; /// promotion for the current function. InstrToOrigTy PromotedInsts; - /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to - /// be updated. + /// ModifiedDT - If CFG is modified in anyway. bool ModifiedDT; /// OptSize - True if optimizing for size. @@ -186,7 +184,7 @@ class TypePromotionTransaction; bool ExtLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI, Instruction *&Inst, const SmallVectorImpl<Instruction *> &Exts, - unsigned CreatedInst); + unsigned CreatedInstCost); bool splitBranchCondition(Function &F); bool simplifyOffsetableRelocate(Instruction &I); }; @@ -214,9 +212,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) { TLI = TM->getSubtargetImpl(F)->getTargetLowering(); TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); - DominatorTreeWrapperPass *DTWP = - getAnalysisIfAvailable<DominatorTreeWrapperPass>(); - DT = DTWP ? &DTWP->getDomTree() : nullptr; OptSize = F.hasFnAttribute(Attribute::OptimizeForSize); /// This optimization identifies DIV instructions that can be @@ -255,7 +250,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) { MadeChange |= OptimizeBlock(*BB, ModifiedDTOnIteration); // Restart BB iteration if the dominator tree of the Function was changed - ModifiedDT |= ModifiedDTOnIteration; if (ModifiedDTOnIteration) break; } @@ -298,8 +292,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) { if (EverMadeChange || MadeChange) MadeChange |= EliminateFallThrough(F); - if (MadeChange) - ModifiedDT = true; EverMadeChange |= MadeChange; } @@ -313,9 +305,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) { EverMadeChange |= simplifyOffsetableRelocate(*I); } - if (ModifiedDT && DT) - DT->recalculate(F); - return EverMadeChange; } @@ -341,7 +330,7 @@ bool CodeGenPrepare::EliminateFallThrough(Function &F) { // Remember if SinglePred was the entry block of the function. // If so, we will need to move BB back to the entry position. bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); - MergeBasicBlockIntoOnlyPred(BB, DT); + MergeBasicBlockIntoOnlyPred(BB, nullptr); if (isEntry && BB != &BB->getParent()->getEntryBlock()) BB->moveBefore(&BB->getParent()->getEntryBlock()); @@ -481,7 +470,7 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { // Remember if SinglePred was the entry block of the function. If so, we // will need to move BB back to the entry position. bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); - MergeBasicBlockIntoOnlyPred(DestBB, DT); + MergeBasicBlockIntoOnlyPred(DestBB, nullptr); if (isEntry && BB != &BB->getParent()->getEntryBlock()) BB->moveBefore(&BB->getParent()->getEntryBlock()); @@ -523,13 +512,6 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { // The PHIs are now updated, change everything that refers to BB to use // DestBB and remove BB. BB->replaceAllUsesWith(DestBB); - if (DT && !ModifiedDT) { - BasicBlock *BBIDom = DT->getNode(BB)->getIDom()->getBlock(); - BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock(); - BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom); - DT->changeImmediateDominator(DestBB, NewIDom); - DT->eraseNode(BB); - } BB->eraseFromParent(); ++NumBlocksElim; @@ -561,12 +543,15 @@ static void computeBaseDerivedRelocateMap( IntrinsicInst *I = Item.second; auto BaseKey = std::make_pair(Key.first, Key.first); - IntrinsicInst *Base = RelocateIdxMap[BaseKey]; - if (!Base) + + // We're iterating over RelocateIdxMap so we cannot modify it. + auto MaybeBase = RelocateIdxMap.find(BaseKey); + if (MaybeBase == RelocateIdxMap.end()) // TODO: We might want to insert a new base object relocate and gep off // that, if there are enough derived object relocates. continue; - RelocateInstMap[Base].push_back(I); + + RelocateInstMap[MaybeBase->second].push_back(I); } } @@ -615,8 +600,8 @@ simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase, // Create a Builder and replace the target callsite with a gep IRBuilder<> Builder(ToReplace); Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc()); - Value *Replacement = - Builder.CreateGEP(RelocatedBase, makeArrayRef(OffsetV)); + Value *Replacement = Builder.CreateGEP( + Derived->getSourceElementType(), RelocatedBase, makeArrayRef(OffsetV)); Instruction *ReplacementInst = cast<Instruction>(Replacement); ReplacementInst->removeFromParent(); ReplacementInst->insertAfter(RelocatedBase); @@ -1225,6 +1210,42 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) { return true; } + const DataLayout *TD = TLI ? TLI->getDataLayout() : nullptr; + + // Align the pointer arguments to this call if the target thinks it's a good + // idea + unsigned MinSize, PrefAlign; + if (TLI && TD && TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) { + for (auto &Arg : CI->arg_operands()) { + // We want to align both objects whose address is used directly and + // objects whose address is used in casts and GEPs, though it only makes + // sense for GEPs if the offset is a multiple of the desired alignment and + // if size - offset meets the size threshold. + if (!Arg->getType()->isPointerTy()) + continue; + APInt Offset(TD->getPointerSizeInBits( + cast<PointerType>(Arg->getType())->getAddressSpace()), 0); + Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*TD, Offset); + uint64_t Offset2 = Offset.getLimitedValue(); + AllocaInst *AI; + if ((Offset2 & (PrefAlign-1)) == 0 && + (AI = dyn_cast<AllocaInst>(Val)) && + AI->getAlignment() < PrefAlign && + TD->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2) + AI->setAlignment(PrefAlign); + // TODO: Also align GlobalVariables + } + // If this is a memcpy (or similar) then we may be able to improve the + // alignment + if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) { + unsigned Align = getKnownAlignment(MI->getDest(), *TD); + if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) + Align = std::min(Align, getKnownAlignment(MTI->getSource(), *TD)); + if (Align > MI->getAlignment()) + MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), Align)); + } + } + IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); if (II) { switch (II->getIntrinsicID()) { @@ -1241,8 +1262,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) { WeakVH IterHandle(CurInstIterator); replaceAndRecursivelySimplify(CI, RetVal, - TLI ? TLI->getDataLayout() : nullptr, - TLInfo, ModifiedDT ? nullptr : DT); + TLInfo, nullptr); // If the iterator instruction was recursively deleted, start over at the // start of the block. @@ -1284,15 +1304,11 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) { // From here on out we're working with named functions. if (!CI->getCalledFunction()) return false; - // We'll need DataLayout from here on out. - const DataLayout *TD = TLI ? TLI->getDataLayout() : nullptr; - if (!TD) return false; - // Lower all default uses of _chk calls. This is very similar // to what InstCombineCalls does, but here we are only lowering calls // to fortified library functions (e.g. __memcpy_chk) that have the default // "don't know" as the objectsize. Anything else should be left alone. - FortifiedLibCallSimplifier Simplifier(TD, TLInfo, true); + FortifiedLibCallSimplifier Simplifier(TLInfo, true); if (Value *V = Simplifier.optimizeCall(CI)) { CI->replaceAllUsesWith(V); CI->eraseFromParent(); @@ -2025,7 +2041,7 @@ private: ExtAddrMode &AMBefore, ExtAddrMode &AMAfter); bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2); - bool IsPromotionProfitable(unsigned MatchedSize, unsigned SizeWithPromotion, + bool IsPromotionProfitable(unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const; }; @@ -2159,7 +2175,7 @@ class TypePromotionHelper { /// \brief Utility function to promote the operand of \p Ext when this /// operand is a promotable trunc or sext or zext. /// \p PromotedInsts maps the instructions to their type before promotion. - /// \p CreatedInsts[out] contains how many non-free instructions have been + /// \p CreatedInstsCost[out] contains the cost of all instructions /// created to promote the operand of Ext. /// Newly added extensions are inserted in \p Exts. /// Newly added truncates are inserted in \p Truncs. @@ -2167,53 +2183,55 @@ class TypePromotionHelper { /// \return The promoted value which is used instead of Ext. static Value *promoteOperandForTruncAndAnyExt( Instruction *Ext, TypePromotionTransaction &TPT, - InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, + InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, SmallVectorImpl<Instruction *> *Exts, - SmallVectorImpl<Instruction *> *Truncs); + SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI); /// \brief Utility function to promote the operand of \p Ext when this /// operand is promotable and is not a supported trunc or sext. /// \p PromotedInsts maps the instructions to their type before promotion. - /// \p CreatedInsts[out] contains how many non-free instructions have been + /// \p CreatedInstsCost[out] contains the cost of all the instructions /// created to promote the operand of Ext. /// Newly added extensions are inserted in \p Exts. /// Newly added truncates are inserted in \p Truncs. /// Should never be called directly. /// \return The promoted value which is used instead of Ext. - static Value * - promoteOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT, - InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, - SmallVectorImpl<Instruction *> *Exts, - SmallVectorImpl<Instruction *> *Truncs, bool IsSExt); + static Value *promoteOperandForOther(Instruction *Ext, + TypePromotionTransaction &TPT, + InstrToOrigTy &PromotedInsts, + unsigned &CreatedInstsCost, + SmallVectorImpl<Instruction *> *Exts, + SmallVectorImpl<Instruction *> *Truncs, + const TargetLowering &TLI, bool IsSExt); /// \see promoteOperandForOther. - static Value * - signExtendOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT, - InstrToOrigTy &PromotedInsts, - unsigned &CreatedInsts, - SmallVectorImpl<Instruction *> *Exts, - SmallVectorImpl<Instruction *> *Truncs) { - return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, Exts, - Truncs, true); + static Value *signExtendOperandForOther( + Instruction *Ext, TypePromotionTransaction &TPT, + InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, + SmallVectorImpl<Instruction *> *Exts, + SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) { + return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost, + Exts, Truncs, TLI, true); } /// \see promoteOperandForOther. - static Value * - zeroExtendOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT, - InstrToOrigTy &PromotedInsts, - unsigned &CreatedInsts, - SmallVectorImpl<Instruction *> *Exts, - SmallVectorImpl<Instruction *> *Truncs) { - return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, Exts, - Truncs, false); + static Value *zeroExtendOperandForOther( + Instruction *Ext, TypePromotionTransaction &TPT, + InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, + SmallVectorImpl<Instruction *> *Exts, + SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) { + return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost, + Exts, Truncs, TLI, false); } public: /// Type for the utility function that promotes the operand of Ext. typedef Value *(*Action)(Instruction *Ext, TypePromotionTransaction &TPT, - InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, + InstrToOrigTy &PromotedInsts, + unsigned &CreatedInstsCost, SmallVectorImpl<Instruction *> *Exts, - SmallVectorImpl<Instruction *> *Truncs); + SmallVectorImpl<Instruction *> *Truncs, + const TargetLowering &TLI); /// \brief Given a sign/zero extend instruction \p Ext, return the approriate /// action to promote the operand of \p Ext instead of using Ext. /// \return NULL if no promotable action is possible with the current @@ -2330,16 +2348,18 @@ TypePromotionHelper::Action TypePromotionHelper::getAction( Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt( llvm::Instruction *SExt, TypePromotionTransaction &TPT, - InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, + InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, SmallVectorImpl<Instruction *> *Exts, - SmallVectorImpl<Instruction *> *Truncs) { + SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) { // By construction, the operand of SExt is an instruction. Otherwise we cannot // get through it and this method should not be called. Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0)); Value *ExtVal = SExt; + bool HasMergedNonFreeExt = false; if (isa<ZExtInst>(SExtOpnd)) { // Replace s|zext(zext(opnd)) // => zext(opnd). + HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd); Value *ZExt = TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType()); TPT.replaceAllUsesWith(SExt, ZExt); @@ -2350,7 +2370,7 @@ Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt( // => z|sext(opnd). TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0)); } - CreatedInsts = 0; + CreatedInstsCost = 0; // Remove dead code. if (SExtOpnd->use_empty()) @@ -2359,8 +2379,11 @@ Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt( // Check if the extension is still needed. Instruction *ExtInst = dyn_cast<Instruction>(ExtVal); if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) { - if (ExtInst && Exts) - Exts->push_back(ExtInst); + if (ExtInst) { + if (Exts) + Exts->push_back(ExtInst); + CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt; + } return ExtVal; } @@ -2373,13 +2396,14 @@ Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt( Value *TypePromotionHelper::promoteOperandForOther( Instruction *Ext, TypePromotionTransaction &TPT, - InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, + InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, SmallVectorImpl<Instruction *> *Exts, - SmallVectorImpl<Instruction *> *Truncs, bool IsSExt) { + SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI, + bool IsSExt) { // By construction, the operand of Ext is an instruction. Otherwise we cannot // get through it and this method should not be called. Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0)); - CreatedInsts = 0; + CreatedInstsCost = 0; if (!ExtOpnd->hasOneUse()) { // ExtOpnd will be promoted. // All its uses, but Ext, will need to use a truncated value of the @@ -2454,7 +2478,6 @@ Value *TypePromotionHelper::promoteOperandForOther( continue; } ExtForOpnd = cast<Instruction>(ValForExtOpnd); - ++CreatedInsts; } if (Exts) Exts->push_back(ExtForOpnd); @@ -2463,6 +2486,7 @@ Value *TypePromotionHelper::promoteOperandForOther( // Move the sign extension before the insertion point. TPT.moveBefore(ExtForOpnd, ExtOpnd); TPT.setOperand(ExtOpnd, OpIdx, ExtForOpnd); + CreatedInstsCost += !TLI.isExtFree(ExtForOpnd); // If more sext are required, new instructions will have to be created. ExtForOpnd = nullptr; } @@ -2475,22 +2499,22 @@ Value *TypePromotionHelper::promoteOperandForOther( /// IsPromotionProfitable - Check whether or not promoting an instruction /// to a wider type was profitable. -/// \p MatchedSize gives the number of instructions that have been matched -/// in the addressing mode after the promotion was applied. -/// \p SizeWithPromotion gives the number of created instructions for -/// the promotion plus the number of instructions that have been -/// matched in the addressing mode before the promotion. +/// \p NewCost gives the cost of extension instructions created by the +/// promotion. +/// \p OldCost gives the cost of extension instructions before the promotion +/// plus the number of instructions that have been +/// matched in the addressing mode the promotion. /// \p PromotedOperand is the value that has been promoted. /// \return True if the promotion is profitable, false otherwise. -bool -AddressingModeMatcher::IsPromotionProfitable(unsigned MatchedSize, - unsigned SizeWithPromotion, - Value *PromotedOperand) const { - // We folded less instructions than what we created to promote the operand. +bool AddressingModeMatcher::IsPromotionProfitable( + unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const { + DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost << '\n'); + // The cost of the new extensions is greater than the cost of the + // old extension plus what we folded. // This is not profitable. - if (MatchedSize < SizeWithPromotion) + if (NewCost > OldCost) return false; - if (MatchedSize > SizeWithPromotion) + if (NewCost < OldCost) return true; // The promotion is neutral but it may help folding the sign extension in // loads for instance. @@ -2688,9 +2712,10 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); - unsigned CreatedInsts = 0; + unsigned CreatedInstsCost = 0; + unsigned ExtCost = !TLI.isExtFree(Ext); Value *PromotedOperand = - TPH(Ext, TPT, PromotedInsts, CreatedInsts, nullptr, nullptr); + TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI); // SExt has been moved away. // Thus either it will be rematched later in the recursive calls or it is // gone. Anyway, we must not fold it into the addressing mode at this point. @@ -2712,7 +2737,12 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, unsigned OldSize = AddrModeInsts.size(); if (!MatchAddr(PromotedOperand, Depth) || - !IsPromotionProfitable(AddrModeInsts.size(), OldSize + CreatedInsts, + // The total of the new cost is equals to the cost of the created + // instructions. + // The total of the old cost is equals to the cost of the extension plus + // what we have saved in the addressing mode. + !IsPromotionProfitable(CreatedInstsCost, + ExtCost + (AddrModeInsts.size() - OldSize), PromotedOperand)) { AddrMode = BackupAddrMode; AddrModeInsts.resize(OldSize); @@ -3472,7 +3502,7 @@ static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) { bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI, Instruction *&Inst, const SmallVectorImpl<Instruction *> &Exts, - unsigned CreatedInsts = 0) { + unsigned CreatedInstsCost = 0) { // Iterate over all the extensions to see if one form an ext(load). for (auto I : Exts) { // Check if we directly have ext(load). @@ -3494,10 +3524,11 @@ bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT, TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); SmallVector<Instruction *, 4> NewExts; - unsigned NewCreatedInsts = 0; + unsigned NewCreatedInstsCost = 0; + unsigned ExtCost = !TLI->isExtFree(I); // Promote. - Value *PromotedVal = - TPH(I, TPT, PromotedInsts, NewCreatedInsts, &NewExts, nullptr); + Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost, + &NewExts, nullptr, *TLI); assert(PromotedVal && "TypePromotionHelper should have filtered out those cases"); @@ -3507,9 +3538,10 @@ bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT, // With exactly 2, the transformation is neutral, because we will merge // one extension but leave one. However, we optimistically keep going, // because the new extension may be removed too. - unsigned TotalCreatedInsts = CreatedInsts + NewCreatedInsts; + long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost; + TotalCreatedInstsCost -= ExtCost; if (!StressExtLdPromotion && - (TotalCreatedInsts > 1 || + (TotalCreatedInstsCost > 1 || !isPromotedInstructionLegal(*TLI, PromotedVal))) { // The promotion is not profitable, rollback to the previous state. TPT.rollback(LastKnownGood); @@ -3517,8 +3549,8 @@ bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT, } // The promotion is profitable. // Check if it exposes an ext(load). - (void)ExtLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInsts); - if (LI && (StressExtLdPromotion || NewCreatedInsts == 0 || + (void)ExtLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInstsCost); + if (LI && (StressExtLdPromotion || NewCreatedInstsCost <= ExtCost || // If we have created a new extension, i.e., now we have two // extensions. We must make sure one of them is merged with // the load, otherwise we may degrade the code quality. @@ -4193,8 +4225,8 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) { // It is possible for very late stage optimizations (such as SimplifyCFG) // to introduce PHI nodes too late to be cleaned up. If we detect such a // trivial PHI, go ahead and zap it here. - if (Value *V = SimplifyInstruction(P, TLI ? TLI->getDataLayout() : nullptr, - TLInfo, DT)) { + const DataLayout &DL = I->getModule()->getDataLayout(); + if (Value *V = SimplifyInstruction(P, DL, TLInfo, nullptr)) { P->replaceAllUsesWith(V); P->eraseFromParent(); ++NumPHIsElim; @@ -4463,8 +4495,7 @@ static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) { /// FIXME: Remove the (equivalent?) implementation in SelectionDAG. /// bool CodeGenPrepare::splitBranchCondition(Function &F) { - if (!TM || TM->Options.EnableFastISel != true || - !TLI || TLI->isJumpExpensive()) + if (!TM || !TM->Options.EnableFastISel || !TLI || TLI->isJumpExpensive()) return false; bool MadeChange = false; @@ -4625,10 +4656,8 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) { } } - // Request DOM Tree update. // Note: No point in getting fancy here, since the DT info is never - // available to CodeGenPrepare and the existing update code is broken - // anyways. + // available to CodeGenPrepare. ModifiedDT = true; MadeChange = true; diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp index 7b47a48..42656fb 100644 --- a/lib/CodeGen/DwarfEHPrepare.cpp +++ b/lib/CodeGen/DwarfEHPrepare.cpp @@ -13,13 +13,19 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/LibCallSemantics.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; #define DEBUG_TYPE "dwarfehprepare" @@ -33,18 +39,28 @@ namespace { // RewindFunction - _Unwind_Resume or the target equivalent. Constant *RewindFunction; + DominatorTree *DT; + const TargetLowering *TLI; + bool InsertUnwindResumeCalls(Function &Fn); Value *GetExceptionObject(ResumeInst *RI); + size_t + pruneUnreachableResumes(Function &Fn, + SmallVectorImpl<ResumeInst *> &Resumes, + SmallVectorImpl<LandingPadInst *> &CleanupLPads); public: static char ID; // Pass identification, replacement for typeid. // INITIALIZE_TM_PASS requires a default constructor, but it isn't used in // practice. - DwarfEHPrepare() : FunctionPass(ID), TM(nullptr), RewindFunction(nullptr) {} + DwarfEHPrepare() + : FunctionPass(ID), TM(nullptr), RewindFunction(nullptr), DT(nullptr), + TLI(nullptr) {} DwarfEHPrepare(const TargetMachine *TM) - : FunctionPass(ID), TM(TM), RewindFunction(nullptr) {} + : FunctionPass(ID), TM(TM), RewindFunction(nullptr), DT(nullptr), + TLI(nullptr) {} bool runOnFunction(Function &Fn) override; @@ -53,6 +69,8 @@ namespace { return false; } + void getAnalysisUsage(AnalysisUsage &AU) const override; + const char *getPassName() const override { return "Exception handling preparation"; } @@ -60,13 +78,22 @@ namespace { } // end anonymous namespace char DwarfEHPrepare::ID = 0; -INITIALIZE_TM_PASS(DwarfEHPrepare, "dwarfehprepare", "Prepare DWARF exceptions", - false, false) +INITIALIZE_TM_PASS_BEGIN(DwarfEHPrepare, "dwarfehprepare", + "Prepare DWARF exceptions", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_TM_PASS_END(DwarfEHPrepare, "dwarfehprepare", + "Prepare DWARF exceptions", false, false) FunctionPass *llvm::createDwarfEHPass(const TargetMachine *TM) { return new DwarfEHPrepare(TM); } +void DwarfEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<TargetTransformInfoWrapperPass>(); + AU.addRequired<DominatorTreeWrapperPass>(); +} + /// GetExceptionObject - Return the exception object from the value passed into /// the 'resume' instruction (typically an aggregate). Clean up any dead /// instructions, including the 'resume' instruction. @@ -107,21 +134,81 @@ Value *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) { return ExnObj; } +/// Replace resumes that are not reachable from a cleanup landing pad with +/// unreachable and then simplify those blocks. +size_t DwarfEHPrepare::pruneUnreachableResumes( + Function &Fn, SmallVectorImpl<ResumeInst *> &Resumes, + SmallVectorImpl<LandingPadInst *> &CleanupLPads) { + BitVector ResumeReachable(Resumes.size()); + size_t ResumeIndex = 0; + for (auto *RI : Resumes) { + for (auto *LP : CleanupLPads) { + if (isPotentiallyReachable(LP, RI, DT)) { + ResumeReachable.set(ResumeIndex); + break; + } + } + ++ResumeIndex; + } + + // If everything is reachable, there is no change. + if (ResumeReachable.all()) + return Resumes.size(); + + const TargetTransformInfo &TTI = + getAnalysis<TargetTransformInfoWrapperPass>().getTTI(Fn); + LLVMContext &Ctx = Fn.getContext(); + + // Otherwise, insert unreachable instructions and call simplifycfg. + size_t ResumesLeft = 0; + for (size_t I = 0, E = Resumes.size(); I < E; ++I) { + ResumeInst *RI = Resumes[I]; + if (ResumeReachable[I]) { + Resumes[ResumesLeft++] = RI; + } else { + BasicBlock *BB = RI->getParent(); + new UnreachableInst(Ctx, RI); + RI->eraseFromParent(); + SimplifyCFG(BB, TTI, 1); + } + } + Resumes.resize(ResumesLeft); + return ResumesLeft; +} + /// InsertUnwindResumeCalls - Convert the ResumeInsts that are still present /// into calls to the appropriate _Unwind_Resume function. bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) { SmallVector<ResumeInst*, 16> Resumes; + SmallVector<LandingPadInst*, 16> CleanupLPads; + bool FoundLP = false; for (BasicBlock &BB : Fn) { if (auto *RI = dyn_cast<ResumeInst>(BB.getTerminator())) Resumes.push_back(RI); + if (auto *LP = BB.getLandingPadInst()) { + if (LP->isCleanup()) + CleanupLPads.push_back(LP); + // Check the personality on the first landingpad. Don't do anything if + // it's for MSVC. + if (!FoundLP) { + FoundLP = true; + EHPersonality Pers = classifyEHPersonality(LP->getPersonalityFn()); + if (isMSVCEHPersonality(Pers)) + return false; + } + } } if (Resumes.empty()) return false; - // Find the rewind function if we didn't already. - const TargetLowering *TLI = TM->getSubtargetImpl(Fn)->getTargetLowering(); LLVMContext &Ctx = Fn.getContext(); + + size_t ResumesLeft = pruneUnreachableResumes(Fn, Resumes, CleanupLPads); + if (ResumesLeft == 0) + return true; // We pruned them all. + + // Find the rewind function if we didn't already. if (!RewindFunction) { FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx), Type::getInt8PtrTy(Ctx), false); @@ -130,9 +217,7 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) { } // Create the basic block where the _Unwind_Resume call will live. - unsigned ResumesSize = Resumes.size(); - - if (ResumesSize == 1) { + if (ResumesLeft == 1) { // Instead of creating a new BB and PHI node, just append the call to // _Unwind_Resume to the end of the single resume block. ResumeInst *RI = Resumes.front(); @@ -149,7 +234,7 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) { } BasicBlock *UnwindBB = BasicBlock::Create(Ctx, "unwind_resume", &Fn); - PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), ResumesSize, + PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), ResumesLeft, "exn.obj", UnwindBB); // Extract the exception object from the ResumeInst and add it to the PHI node @@ -175,6 +260,10 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) { bool DwarfEHPrepare::runOnFunction(Function &Fn) { assert(TM && "DWARF EH preparation requires a target machine"); + DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + TLI = TM->getSubtargetImpl(Fn)->getTargetLowering(); bool Changed = InsertUnwindResumeCalls(Fn); + DT = nullptr; + TLI = nullptr; return Changed; } diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp index b3a22c8..5b09cf1 100644 --- a/lib/CodeGen/ExecutionDepsFix.cpp +++ b/lib/CodeGen/ExecutionDepsFix.cpp @@ -113,7 +113,7 @@ struct DomainValue { } namespace { -/// LiveReg - Information about a live register. +/// Information about a live register. struct LiveReg { /// Value currently in this register, or NULL when no value is being tracked. /// This counts as a DomainValue reference. @@ -125,7 +125,7 @@ struct LiveReg { /// will be a negative number. int Def; }; -} // anonynous namespace +} // anonymous namespace namespace { class ExeDepsFix : public MachineFunctionPass { @@ -174,7 +174,7 @@ public: private: iterator_range<SmallVectorImpl<int>::const_iterator> - regIndizes(unsigned Reg) const; + regIndices(unsigned Reg) const; // DomainValue allocation. DomainValue *alloc(int domain = -1); @@ -205,10 +205,10 @@ private: char ExeDepsFix::ID = 0; -/// Translate TRI register number to a list of indizes into our stmaller tables +/// Translate TRI register number to a list of indices into our smaller tables /// of interesting registers. iterator_range<SmallVectorImpl<int>::const_iterator> -ExeDepsFix::regIndizes(unsigned Reg) const { +ExeDepsFix::regIndices(unsigned Reg) const { assert(Reg < AliasMap.size() && "Invalid register"); const auto &Entry = AliasMap[Reg]; return make_range(Entry.begin(), Entry.end()); @@ -225,7 +225,7 @@ DomainValue *ExeDepsFix::alloc(int domain) { return dv; } -/// release - Release a reference to DV. When the last reference is released, +/// Release a reference to DV. When the last reference is released, /// collapse if needed. void ExeDepsFix::release(DomainValue *DV) { while (DV) { @@ -245,8 +245,8 @@ void ExeDepsFix::release(DomainValue *DV) { } } -/// resolve - Follow the chain of dead DomainValues until a live DomainValue is -/// reached. Update the referenced pointer when necessary. +/// Follow the chain of dead DomainValues until a live DomainValue is reached. +/// Update the referenced pointer when necessary. DomainValue *ExeDepsFix::resolve(DomainValue *&DVRef) { DomainValue *DV = DVRef; if (!DV || !DV->Next) @@ -325,8 +325,7 @@ void ExeDepsFix::collapse(DomainValue *dv, unsigned domain) { setLiveReg(rx, alloc(domain)); } -/// Merge - All instructions and registers in B are moved to A, and B is -/// released. +/// All instructions and registers in B are moved to A, and B is released. bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) { assert(!A->isCollapsed() && "Cannot merge into collapsed"); assert(!B->isCollapsed() && "Cannot merge from collapsed"); @@ -352,7 +351,7 @@ bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) { return true; } -// enterBasicBlock - Set up LiveRegs by merging predecessor live-out values. +/// Set up LiveRegs by merging predecessor live-out values. void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { // Detect back-edges from predecessors we haven't processed yet. SeenUnknownBackEdge = false; @@ -378,7 +377,7 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { if (MBB->pred_empty()) { for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(), e = MBB->livein_end(); i != e; ++i) { - for (int rx : regIndizes(*i)) { + for (int rx : regIndices(*i)) { // Treat function live-ins as if they were defined just before the first // instruction. Usually, function arguments are set up immediately // before the call. @@ -475,7 +474,7 @@ void ExeDepsFix::visitInstr(MachineInstr *MI) { bool ExeDepsFix::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx, unsigned Pref) { unsigned reg = MI->getOperand(OpIdx).getReg(); - for (int rx : regIndizes(reg)) { + for (int rx : regIndices(reg)) { unsigned Clearance = CurInstr - LiveRegs[rx].Def; DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref); @@ -521,7 +520,7 @@ void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) { break; if (MO.isUse()) continue; - for (int rx : regIndizes(MO.getReg())) { + for (int rx : regIndices(MO.getReg())) { // This instruction explicitly defines rx. DEBUG(dbgs() << TRI->getName(RC->getRegister(rx)) << ":\t" << CurInstr << '\t' << *MI); @@ -587,7 +586,7 @@ void ExeDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) { e = mi->getDesc().getNumOperands(); i != e; ++i) { MachineOperand &mo = mi->getOperand(i); if (!mo.isReg()) continue; - for (int rx : regIndizes(mo.getReg())) { + for (int rx : regIndices(mo.getReg())) { force(rx, domain); } } @@ -596,7 +595,7 @@ void ExeDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) { for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) { MachineOperand &mo = mi->getOperand(i); if (!mo.isReg()) continue; - for (int rx : regIndizes(mo.getReg())) { + for (int rx : regIndices(mo.getReg())) { kill(rx); force(rx, domain); } @@ -616,7 +615,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { e = mi->getDesc().getNumOperands(); i != e; ++i) { MachineOperand &mo = mi->getOperand(i); if (!mo.isReg()) continue; - for (int rx : regIndizes(mo.getReg())) { + for (int rx : regIndices(mo.getReg())) { DomainValue *dv = LiveRegs[rx].Value; if (dv == nullptr) continue; @@ -712,7 +711,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { ii != ee; ++ii) { MachineOperand &mo = *ii; if (!mo.isReg()) continue; - for (int rx : regIndizes(mo.getReg())) { + for (int rx : regIndices(mo.getReg())) { if (!LiveRegs[rx].Value || (mo.isDef() && LiveRegs[rx].Value != dv)) { kill(rx); setLiveReg(rx, dv); diff --git a/lib/CodeGen/ForwardControlFlowIntegrity.cpp b/lib/CodeGen/ForwardControlFlowIntegrity.cpp deleted file mode 100644 index 63c3699..0000000 --- a/lib/CodeGen/ForwardControlFlowIntegrity.cpp +++ /dev/null @@ -1,374 +0,0 @@ -//===-- ForwardControlFlowIntegrity.cpp: Forward-Edge CFI -----------------===// -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// \brief A pass that instruments code with fast checks for indirect calls and -/// hooks for a function to check violations. -/// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "cfi" - -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/JumpInstrTableInfo.h" -#include "llvm/CodeGen/ForwardControlFlowIntegrity.h" -#include "llvm/CodeGen/JumpInstrTables.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/IR/Attributes.h" -#include "llvm/IR/CallSite.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/GlobalValue.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InlineAsm.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Operator.h" -#include "llvm/IR/Type.h" -#include "llvm/IR/Verifier.h" -#include "llvm/Pass.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -STATISTIC(NumCFIIndirectCalls, - "Number of indirect call sites rewritten by the CFI pass"); - -char ForwardControlFlowIntegrity::ID = 0; -INITIALIZE_PASS_BEGIN(ForwardControlFlowIntegrity, "forward-cfi", - "Control-Flow Integrity", true, true) -INITIALIZE_PASS_DEPENDENCY(JumpInstrTableInfo); -INITIALIZE_PASS_DEPENDENCY(JumpInstrTables); -INITIALIZE_PASS_END(ForwardControlFlowIntegrity, "forward-cfi", - "Control-Flow Integrity", true, true) - -ModulePass *llvm::createForwardControlFlowIntegrityPass() { - return new ForwardControlFlowIntegrity(); -} - -ModulePass *llvm::createForwardControlFlowIntegrityPass( - JumpTable::JumpTableType JTT, CFIntegrity CFIType, bool CFIEnforcing, - StringRef CFIFuncName) { - return new ForwardControlFlowIntegrity(JTT, CFIType, CFIEnforcing, - CFIFuncName); -} - -// Checks to see if a given CallSite is making an indirect call, including -// cases where the indirect call is made through a bitcast. -static bool isIndirectCall(CallSite &CS) { - if (CS.getCalledFunction()) - return false; - - // Check the value to see if it is merely a bitcast of a function. In - // this case, it will translate to a direct function call in the resulting - // assembly, so we won't treat it as an indirect call here. - const Value *V = CS.getCalledValue(); - if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { - return !(CE->isCast() && isa<Function>(CE->getOperand(0))); - } - - // Otherwise, since we know it's a call, it must be an indirect call - return true; -} - -static const char cfi_failure_func_name[] = "__llvm_cfi_pointer_warning"; - -ForwardControlFlowIntegrity::ForwardControlFlowIntegrity() - : ModulePass(ID), IndirectCalls(), JTType(JumpTable::Single), - CFIType(CFIntegrity::Sub), CFIEnforcing(false), CFIFuncName("") { - initializeForwardControlFlowIntegrityPass(*PassRegistry::getPassRegistry()); -} - -ForwardControlFlowIntegrity::ForwardControlFlowIntegrity( - JumpTable::JumpTableType JTT, CFIntegrity CFIType, bool CFIEnforcing, - std::string CFIFuncName) - : ModulePass(ID), IndirectCalls(), JTType(JTT), CFIType(CFIType), - CFIEnforcing(CFIEnforcing), CFIFuncName(CFIFuncName) { - initializeForwardControlFlowIntegrityPass(*PassRegistry::getPassRegistry()); -} - -ForwardControlFlowIntegrity::~ForwardControlFlowIntegrity() {} - -void ForwardControlFlowIntegrity::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<JumpInstrTableInfo>(); - AU.addRequired<JumpInstrTables>(); -} - -void ForwardControlFlowIntegrity::getIndirectCalls(Module &M) { - // To get the indirect calls, we iterate over all functions and iterate over - // the list of basic blocks in each. We extract a total list of indirect calls - // before modifying any of them, since our modifications will modify the list - // of basic blocks. - for (Function &F : M) { - for (BasicBlock &BB : F) { - for (Instruction &I : BB) { - CallSite CS(&I); - if (!(CS && isIndirectCall(CS))) - continue; - - Value *CalledValue = CS.getCalledValue(); - - // Don't rewrite this instruction if the indirect call is actually just - // inline assembly, since our transformation will generate an invalid - // module in that case. - if (isa<InlineAsm>(CalledValue)) - continue; - - IndirectCalls.push_back(&I); - } - } - } -} - -void ForwardControlFlowIntegrity::updateIndirectCalls(Module &M, - CFITables &CFIT) { - Type *Int64Ty = Type::getInt64Ty(M.getContext()); - for (Instruction *I : IndirectCalls) { - CallSite CS(I); - Value *CalledValue = CS.getCalledValue(); - - // Get the function type for this call and look it up in the tables. - Type *VTy = CalledValue->getType(); - PointerType *PTy = dyn_cast<PointerType>(VTy); - Type *EltTy = PTy->getElementType(); - FunctionType *FunTy = dyn_cast<FunctionType>(EltTy); - FunctionType *TransformedTy = JumpInstrTables::transformType(JTType, FunTy); - ++NumCFIIndirectCalls; - Constant *JumpTableStart = nullptr; - Constant *JumpTableMask = nullptr; - Constant *JumpTableSize = nullptr; - - // Some call sites have function types that don't correspond to any - // address-taken function in the module. This happens when function pointers - // are passed in from external code. - auto it = CFIT.find(TransformedTy); - if (it == CFIT.end()) { - // In this case, make sure that the function pointer will change by - // setting the mask and the start to be 0 so that the transformed - // function is 0. - JumpTableStart = ConstantInt::get(Int64Ty, 0); - JumpTableMask = ConstantInt::get(Int64Ty, 0); - JumpTableSize = ConstantInt::get(Int64Ty, 0); - } else { - JumpTableStart = it->second.StartValue; - JumpTableMask = it->second.MaskValue; - JumpTableSize = it->second.Size; - } - - rewriteFunctionPointer(M, I, CalledValue, JumpTableStart, JumpTableMask, - JumpTableSize); - } - - return; -} - -bool ForwardControlFlowIntegrity::runOnModule(Module &M) { - JumpInstrTableInfo *JITI = &getAnalysis<JumpInstrTableInfo>(); - Type *Int64Ty = Type::getInt64Ty(M.getContext()); - Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext()); - - // JumpInstrTableInfo stores information about the alignment of each entry. - // The alignment returned by JumpInstrTableInfo is alignment in bytes, not - // in the exponent. - ByteAlignment = JITI->entryByteAlignment(); - LogByteAlignment = llvm::Log2_64(ByteAlignment); - - // Set up tables for control-flow integrity based on information about the - // jump-instruction tables. - CFITables CFIT; - for (const auto &KV : JITI->getTables()) { - uint64_t Size = static_cast<uint64_t>(KV.second.size()); - uint64_t TableSize = NextPowerOf2(Size); - - int64_t MaskValue = ((TableSize << LogByteAlignment) - 1) & -ByteAlignment; - Constant *JumpTableMaskValue = ConstantInt::get(Int64Ty, MaskValue); - Constant *JumpTableSize = ConstantInt::get(Int64Ty, Size); - - // The base of the table is defined to be the first jumptable function in - // the table. - Function *First = KV.second.begin()->second; - Constant *JumpTableStartValue = ConstantExpr::getBitCast(First, VoidPtrTy); - CFIT[KV.first].StartValue = JumpTableStartValue; - CFIT[KV.first].MaskValue = JumpTableMaskValue; - CFIT[KV.first].Size = JumpTableSize; - } - - if (CFIT.empty()) - return false; - - getIndirectCalls(M); - - if (!CFIEnforcing) { - addWarningFunction(M); - } - - // Update the instructions with the check and the indirect jump through our - // table. - updateIndirectCalls(M, CFIT); - - return true; -} - -void ForwardControlFlowIntegrity::addWarningFunction(Module &M) { - PointerType *CharPtrTy = Type::getInt8PtrTy(M.getContext()); - - // Get the type of the Warning Function: void (i8*, i8*), - // where the first argument is the name of the function in which the violation - // occurs, and the second is the function pointer that violates CFI. - SmallVector<Type *, 2> WarningFunArgs; - WarningFunArgs.push_back(CharPtrTy); - WarningFunArgs.push_back(CharPtrTy); - FunctionType *WarningFunTy = - FunctionType::get(Type::getVoidTy(M.getContext()), WarningFunArgs, false); - - if (!CFIFuncName.empty()) { - Constant *FailureFun = M.getOrInsertFunction(CFIFuncName, WarningFunTy); - if (!FailureFun) - report_fatal_error("Could not get or insert the function specified by" - " -cfi-func-name"); - } else { - // The default warning function swallows the warning and lets the call - // continue, since there's no generic way for it to print out this - // information. - Function *WarningFun = M.getFunction(cfi_failure_func_name); - if (!WarningFun) { - WarningFun = - Function::Create(WarningFunTy, GlobalValue::LinkOnceAnyLinkage, - cfi_failure_func_name, &M); - } - - BasicBlock *Entry = - BasicBlock::Create(M.getContext(), "entry", WarningFun, 0); - ReturnInst::Create(M.getContext(), Entry); - } -} - -void ForwardControlFlowIntegrity::rewriteFunctionPointer( - Module &M, Instruction *I, Value *FunPtr, Constant *JumpTableStart, - Constant *JumpTableMask, Constant *JumpTableSize) { - IRBuilder<> TempBuilder(I); - - Type *OrigFunType = FunPtr->getType(); - - BasicBlock *CurBB = cast<BasicBlock>(I->getParent()); - Function *CurF = cast<Function>(CurBB->getParent()); - Type *Int64Ty = Type::getInt64Ty(M.getContext()); - - Value *TI = TempBuilder.CreatePtrToInt(FunPtr, Int64Ty); - Value *TStartInt = TempBuilder.CreatePtrToInt(JumpTableStart, Int64Ty); - - Value *NewFunPtr = nullptr; - Value *Check = nullptr; - switch (CFIType) { - case CFIntegrity::Sub: { - // This is the subtract, mask, and add version. - // Subtract from the base. - Value *Sub = TempBuilder.CreateSub(TI, TStartInt); - - // Mask the difference to force this to be a table offset. - Value *And = TempBuilder.CreateAnd(Sub, JumpTableMask); - - // Add it back to the base. - Value *Result = TempBuilder.CreateAdd(And, TStartInt); - - // Convert it back into a function pointer that we can call. - NewFunPtr = TempBuilder.CreateIntToPtr(Result, OrigFunType); - break; - } - case CFIntegrity::Ror: { - // This is the subtract and rotate version. - // Rotate right by the alignment value. The optimizer should recognize - // this sequence as a rotation. - - // This cast is safe, since unsigned is always a subset of uint64_t. - uint64_t LogByteAlignment64 = static_cast<uint64_t>(LogByteAlignment); - Constant *RightShift = ConstantInt::get(Int64Ty, LogByteAlignment64); - Constant *LeftShift = ConstantInt::get(Int64Ty, 64 - LogByteAlignment64); - - // Subtract from the base. - Value *Sub = TempBuilder.CreateSub(TI, TStartInt); - - // Create the equivalent of a rotate-right instruction. - Value *Shr = TempBuilder.CreateLShr(Sub, RightShift); - Value *Shl = TempBuilder.CreateShl(Sub, LeftShift); - Value *Or = TempBuilder.CreateOr(Shr, Shl); - - // Perform unsigned comparison to check for inclusion in the table. - Check = TempBuilder.CreateICmpULT(Or, JumpTableSize); - NewFunPtr = FunPtr; - break; - } - case CFIntegrity::Add: { - // This is the mask and add version. - // Mask the function pointer to turn it into an offset into the table. - Value *And = TempBuilder.CreateAnd(TI, JumpTableMask); - - // Then or this offset to the base and get the pointer value. - Value *Result = TempBuilder.CreateAdd(And, TStartInt); - - // Convert it back into a function pointer that we can call. - NewFunPtr = TempBuilder.CreateIntToPtr(Result, OrigFunType); - break; - } - } - - if (!CFIEnforcing) { - // If a check hasn't been added (in the rotation version), then check to see - // if it's the same as the original function. This check determines whether - // or not we call the CFI failure function. - if (!Check) - Check = TempBuilder.CreateICmpEQ(NewFunPtr, FunPtr); - BasicBlock *InvalidPtrBlock = - BasicBlock::Create(M.getContext(), "invalid.ptr", CurF, 0); - BasicBlock *ContinuationBB = CurBB->splitBasicBlock(I); - - // Remove the unconditional branch that connects the two blocks. - TerminatorInst *TermInst = CurBB->getTerminator(); - TermInst->eraseFromParent(); - - // Add a conditional branch that depends on the Check above. - BranchInst::Create(ContinuationBB, InvalidPtrBlock, Check, CurBB); - - // Call the warning function for this pointer, then continue. - Instruction *BI = BranchInst::Create(ContinuationBB, InvalidPtrBlock); - insertWarning(M, InvalidPtrBlock, BI, FunPtr); - } else { - // Modify the instruction to call this value. - CallSite CS(I); - CS.setCalledFunction(NewFunPtr); - } -} - -void ForwardControlFlowIntegrity::insertWarning(Module &M, BasicBlock *Block, - Instruction *I, Value *FunPtr) { - Function *ParentFun = cast<Function>(Block->getParent()); - - // Get the function to call right before the instruction. - Function *WarningFun = nullptr; - if (CFIFuncName.empty()) { - WarningFun = M.getFunction(cfi_failure_func_name); - } else { - WarningFun = M.getFunction(CFIFuncName); - } - - assert(WarningFun && "Could not find the CFI failure function"); - - Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext()); - - IRBuilder<> WarningInserter(I); - // Create a mergeable GlobalVariable containing the name of the function. - Value *ParentNameGV = - WarningInserter.CreateGlobalString(ParentFun->getName()); - Value *ParentNamePtr = WarningInserter.CreateBitCast(ParentNameGV, VoidPtrTy); - Value *FunVoidPtr = WarningInserter.CreateBitCast(FunPtr, VoidPtrTy); - WarningInserter.CreateCall2(WarningFun, ParentNamePtr, FunVoidPtr); -} diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index 7a29569..b8799a5 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -247,7 +247,7 @@ namespace { return true; else if (Incr1 == Incr2) { // Favors subsumption. - if (C1->NeedSubsumption == false && C2->NeedSubsumption == true) + if (!C1->NeedSubsumption && C2->NeedSubsumption) return true; else if (C1->NeedSubsumption == C2->NeedSubsumption) { // Favors diamond over triangle, etc. @@ -726,6 +726,12 @@ bool IfConverter::FeasibilityAnalysis(BBInfo &BBI, if (BBI.IsDone || BBI.IsUnpredicable) return false; + // If it is already predicated but we couldn't analyze its terminator, the + // latter might fallthrough, but we can't determine where to. + // Conservatively avoid if-converting again. + if (BBI.Predicate.size() && !BBI.IsBrAnalyzable) + return false; + // If it is already predicated, check if the new predicate subsumes // its predicate. if (BBI.Predicate.size() && !TII->SubsumesPredicate(Pred, BBI.Predicate)) @@ -1555,7 +1561,7 @@ void IfConverter::PredicateBlock(BBInfo &BBI, UpdatePredRedefs(I, Redefs); } - std::copy(Cond.begin(), Cond.end(), std::back_inserter(BBI.Predicate)); + BBI.Predicate.append(Cond.begin(), Cond.end()); BBI.IsAnalyzed = false; BBI.NonPredSize = 0; @@ -1620,9 +1626,8 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, } } - std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(), - std::back_inserter(ToBBI.Predicate)); - std::copy(Cond.begin(), Cond.end(), std::back_inserter(ToBBI.Predicate)); + ToBBI.Predicate.append(FromBBI.Predicate.begin(), FromBBI.Predicate.end()); + ToBBI.Predicate.append(Cond.begin(), Cond.end()); ToBBI.ClobbersPred |= FromBBI.ClobbersPred; ToBBI.IsAnalyzed = false; @@ -1661,8 +1666,7 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) { if (NBB && !FromBBI.BB->isSuccessor(NBB)) FromBBI.BB->addSuccessor(NBB); - std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(), - std::back_inserter(ToBBI.Predicate)); + ToBBI.Predicate.append(FromBBI.Predicate.begin(), FromBBI.Predicate.end()); FromBBI.Predicate.clear(); ToBBI.NonPredSize += FromBBI.NonPredSize; diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp index 187e015..fd5749b 100644 --- a/lib/CodeGen/InterferenceCache.cpp +++ b/lib/CodeGen/InterferenceCache.cpp @@ -21,7 +21,8 @@ using namespace llvm; #define DEBUG_TYPE "regalloc" // Static member used for null interference cursors. -InterferenceCache::BlockInterference InterferenceCache::Cursor::NoInterference; +const InterferenceCache::BlockInterference + InterferenceCache::Cursor::NoInterference; // Initializes PhysRegEntries (instead of a SmallVector, PhysRegEntries is a // buffer of size NumPhysRegs to speed up alloc/clear for targets with large diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h index 1791afb..6519a80 100644 --- a/lib/CodeGen/InterferenceCache.h +++ b/lib/CodeGen/InterferenceCache.h @@ -170,8 +170,8 @@ public: /// Cursor - The primary query interface for the block interference cache. class Cursor { Entry *CacheEntry; - BlockInterference *Current; - static BlockInterference NoInterference; + const BlockInterference *Current; + static const BlockInterference NoInterference; void setEntry(Entry *E) { Current = nullptr; diff --git a/lib/CodeGen/JumpInstrTables.cpp b/lib/CodeGen/JumpInstrTables.cpp deleted file mode 100644 index 75fa261..0000000 --- a/lib/CodeGen/JumpInstrTables.cpp +++ /dev/null @@ -1,296 +0,0 @@ -//===-- JumpInstrTables.cpp: Jump-Instruction Tables ----------------------===// -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// \brief An implementation of jump-instruction tables. -/// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "jt" - -#include "llvm/CodeGen/JumpInstrTables.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/JumpInstrTableInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/IR/Attributes.h" -#include "llvm/IR/CallSite.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Operator.h" -#include "llvm/IR/Type.h" -#include "llvm/IR/Verifier.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include <vector> - -using namespace llvm; - -char JumpInstrTables::ID = 0; - -INITIALIZE_PASS_BEGIN(JumpInstrTables, "jump-instr-tables", - "Jump-Instruction Tables", true, true) -INITIALIZE_PASS_DEPENDENCY(JumpInstrTableInfo); -INITIALIZE_PASS_END(JumpInstrTables, "jump-instr-tables", - "Jump-Instruction Tables", true, true) - -STATISTIC(NumJumpTables, "Number of indirect call tables generated"); -STATISTIC(NumFuncsInJumpTables, "Number of functions in the jump tables"); - -ModulePass *llvm::createJumpInstrTablesPass() { - // The default implementation uses a single table for all functions. - return new JumpInstrTables(JumpTable::Single); -} - -ModulePass *llvm::createJumpInstrTablesPass(JumpTable::JumpTableType JTT) { - return new JumpInstrTables(JTT); -} - -namespace { -static const char jump_func_prefix[] = "__llvm_jump_instr_table_"; -static const char jump_section_prefix[] = ".jump.instr.table.text."; - -// Checks to see if a given CallSite is making an indirect call, including -// cases where the indirect call is made through a bitcast. -bool isIndirectCall(CallSite &CS) { - if (CS.getCalledFunction()) - return false; - - // Check the value to see if it is merely a bitcast of a function. In - // this case, it will translate to a direct function call in the resulting - // assembly, so we won't treat it as an indirect call here. - const Value *V = CS.getCalledValue(); - if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { - return !(CE->isCast() && isa<Function>(CE->getOperand(0))); - } - - // Otherwise, since we know it's a call, it must be an indirect call - return true; -} - -// Replaces Functions and GlobalAliases with a different Value. -bool replaceGlobalValueIndirectUse(GlobalValue *GV, Value *V, Use *U) { - User *Us = U->getUser(); - if (!Us) - return false; - if (Instruction *I = dyn_cast<Instruction>(Us)) { - CallSite CS(I); - - // Don't do the replacement if this use is a direct call to this function. - // If the use is not the called value, then replace it. - if (CS && (isIndirectCall(CS) || CS.isCallee(U))) { - return false; - } - - U->set(V); - } else if (Constant *C = dyn_cast<Constant>(Us)) { - // Don't replace calls to bitcasts of function symbols, since they get - // translated to direct calls. - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Us)) { - if (CE->getOpcode() == Instruction::BitCast) { - // This bitcast must have exactly one user. - if (CE->user_begin() != CE->user_end()) { - User *ParentUs = *CE->user_begin(); - if (CallInst *CI = dyn_cast<CallInst>(ParentUs)) { - CallSite CS(CI); - Use &CEU = *CE->use_begin(); - if (CS.isCallee(&CEU)) { - return false; - } - } - } - } - } - - // GlobalAlias doesn't support replaceUsesOfWithOnConstant. And the verifier - // requires alias to point to a defined function. So, GlobalAlias is handled - // as a separate case in runOnModule. - if (!isa<GlobalAlias>(C)) - C->replaceUsesOfWithOnConstant(GV, V, U); - } else { - llvm_unreachable("The Use of a Function symbol is neither an instruction " - "nor a constant"); - } - - return true; -} - -// Replaces all replaceable address-taken uses of GV with a pointer to a -// jump-instruction table entry. -void replaceValueWithFunction(GlobalValue *GV, Function *F) { - // Go through all uses of this function and replace the uses of GV with the - // jump-table version of the function. Get the uses as a vector before - // replacing them, since replacing them changes the use list and invalidates - // the iterator otherwise. - for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E;) { - Use &U = *I++; - - // Replacement of constants replaces all instances in the constant. So, some - // uses might have already been handled by the time we reach them here. - if (U.get() == GV) - replaceGlobalValueIndirectUse(GV, F, &U); - } - - return; -} -} // end anonymous namespace - -JumpInstrTables::JumpInstrTables() - : ModulePass(ID), Metadata(), JITI(nullptr), TableCount(0), - JTType(JumpTable::Single) { - initializeJumpInstrTablesPass(*PassRegistry::getPassRegistry()); -} - -JumpInstrTables::JumpInstrTables(JumpTable::JumpTableType JTT) - : ModulePass(ID), Metadata(), JITI(nullptr), TableCount(0), JTType(JTT) { - initializeJumpInstrTablesPass(*PassRegistry::getPassRegistry()); -} - -JumpInstrTables::~JumpInstrTables() {} - -void JumpInstrTables::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<JumpInstrTableInfo>(); -} - -Function *JumpInstrTables::insertEntry(Module &M, Function *Target) { - FunctionType *OrigFunTy = Target->getFunctionType(); - FunctionType *FunTy = transformType(JTType, OrigFunTy); - - JumpMap::iterator it = Metadata.find(FunTy); - if (Metadata.end() == it) { - struct TableMeta Meta; - Meta.TableNum = TableCount; - Meta.Count = 0; - Metadata[FunTy] = Meta; - it = Metadata.find(FunTy); - ++NumJumpTables; - ++TableCount; - } - - it->second.Count++; - - std::string NewName(jump_func_prefix); - NewName += (Twine(it->second.TableNum) + "_" + Twine(it->second.Count)).str(); - Function *JumpFun = - Function::Create(OrigFunTy, GlobalValue::ExternalLinkage, NewName, &M); - // The section for this table - JumpFun->setSection((jump_section_prefix + Twine(it->second.TableNum)).str()); - JITI->insertEntry(FunTy, Target, JumpFun); - - ++NumFuncsInJumpTables; - return JumpFun; -} - -bool JumpInstrTables::hasTable(FunctionType *FunTy) { - FunctionType *TransTy = transformType(JTType, FunTy); - return Metadata.end() != Metadata.find(TransTy); -} - -FunctionType *JumpInstrTables::transformType(JumpTable::JumpTableType JTT, - FunctionType *FunTy) { - // Returning nullptr forces all types into the same table, since all types map - // to the same type - Type *VoidPtrTy = Type::getInt8PtrTy(FunTy->getContext()); - - // Ignore the return type. - Type *RetTy = VoidPtrTy; - bool IsVarArg = FunTy->isVarArg(); - std::vector<Type *> ParamTys(FunTy->getNumParams()); - FunctionType::param_iterator PI, PE; - int i = 0; - - std::vector<Type *> EmptyParams; - Type *Int32Ty = Type::getInt32Ty(FunTy->getContext()); - FunctionType *VoidFnTy = FunctionType::get( - Type::getVoidTy(FunTy->getContext()), EmptyParams, false); - switch (JTT) { - case JumpTable::Single: - - return FunctionType::get(RetTy, EmptyParams, false); - case JumpTable::Arity: - // Transform all types to void* so that all functions with the same arity - // end up in the same table. - for (PI = FunTy->param_begin(), PE = FunTy->param_end(); PI != PE; - PI++, i++) { - ParamTys[i] = VoidPtrTy; - } - - return FunctionType::get(RetTy, ParamTys, IsVarArg); - case JumpTable::Simplified: - // Project all parameters types to one of 3 types: composite, integer, and - // function, matching the three subclasses of Type. - for (PI = FunTy->param_begin(), PE = FunTy->param_end(); PI != PE; - ++PI, ++i) { - assert((isa<IntegerType>(*PI) || isa<FunctionType>(*PI) || - isa<CompositeType>(*PI)) && - "This type is not an Integer or a Composite or a Function"); - if (isa<CompositeType>(*PI)) { - ParamTys[i] = VoidPtrTy; - } else if (isa<FunctionType>(*PI)) { - ParamTys[i] = VoidFnTy; - } else if (isa<IntegerType>(*PI)) { - ParamTys[i] = Int32Ty; - } - } - - return FunctionType::get(RetTy, ParamTys, IsVarArg); - case JumpTable::Full: - // Don't transform this type at all. - return FunTy; - } - - return nullptr; -} - -bool JumpInstrTables::runOnModule(Module &M) { - JITI = &getAnalysis<JumpInstrTableInfo>(); - - // Get the set of jumptable-annotated functions that have their address taken. - DenseMap<Function *, Function *> Functions; - for (Function &F : M) { - if (F.hasFnAttribute(Attribute::JumpTable) && F.hasAddressTaken()) { - assert(F.hasUnnamedAddr() && - "Attribute 'jumptable' requires 'unnamed_addr'"); - Functions[&F] = nullptr; - } - } - - // Create the jump-table functions. - for (auto &KV : Functions) { - Function *F = KV.first; - KV.second = insertEntry(M, F); - } - - // GlobalAlias is a special case, because the target of an alias statement - // must be a defined function. So, instead of replacing a given function in - // the alias, we replace all uses of aliases that target jumptable functions. - // Note that there's no need to create these functions, since only aliases - // that target known jumptable functions are replaced, and there's no way to - // put the jumptable annotation on a global alias. - DenseMap<GlobalAlias *, Function *> Aliases; - for (GlobalAlias &GA : M.aliases()) { - Constant *Aliasee = GA.getAliasee(); - if (Function *F = dyn_cast<Function>(Aliasee)) { - auto it = Functions.find(F); - if (it != Functions.end()) { - Aliases[&GA] = it->second; - } - } - } - - // Replace each address taken function with its jump-instruction table entry. - for (auto &KV : Functions) - replaceValueWithFunction(KV.first, KV.second); - - for (auto &KV : Aliases) - replaceValueWithFunction(KV.first, KV.second); - - return !Functions.empty(); -} diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 9c23368..0fb0c46 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -12,12 +12,9 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/TargetMachine.h" -#include "llvm/Analysis/JumpInstrTableInfo.h" #include "llvm/Analysis/Passes.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/BasicTTIImpl.h" -#include "llvm/CodeGen/ForwardControlFlowIntegrity.h" -#include "llvm/CodeGen/JumpInstrTables.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" @@ -33,12 +30,8 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/TargetRegistry.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Scalar.h" using namespace llvm; @@ -50,8 +43,16 @@ EnableFastISelOption("fast-isel", cl::Hidden, cl::desc("Enable the \"fast\" instruction selector")); void LLVMTargetMachine::initAsmInfo() { - MCAsmInfo *TmpAsmInfo = TheTarget.createMCAsmInfo( - *getSubtargetImpl()->getRegisterInfo(), getTargetTriple()); + MRI = TheTarget.createMCRegInfo(getTargetTriple()); + MII = TheTarget.createMCInstrInfo(); + // FIXME: Having an MCSubtargetInfo on the target machine is a hack due + // to some backends having subtarget feature dependent module level + // code generation. This is similar to the hack in the AsmPrinter for + // module level assembly etc. + STI = TheTarget.createMCSubtargetInfo(getTargetTriple(), getTargetCPU(), + getTargetFeatureString()); + + MCAsmInfo *TmpAsmInfo = TheTarget.createMCAsmInfo(*MRI, getTargetTriple()); // TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0, // and if the old one gets included then MCAsmInfo will be NULL and // we'll crash later. @@ -69,12 +70,13 @@ void LLVMTargetMachine::initAsmInfo() { AsmInfo = TmpAsmInfo; } -LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple, - StringRef CPU, StringRef FS, - TargetOptions Options, +LLVMTargetMachine::LLVMTargetMachine(const Target &T, + StringRef DataLayoutString, + StringRef Triple, StringRef CPU, + StringRef FS, TargetOptions Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : TargetMachine(T, Triple, CPU, FS, Options) { + : TargetMachine(T, DataLayoutString, Triple, CPU, FS, Options) { CodeGenInfo = T.createMCCodeGenInfo(Triple, RM, CM, OL); } @@ -115,8 +117,7 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM, // Install a MachineModuleInfo class, which is an immutable pass that holds // all the per-module stuff we're generating, including MCContext. MachineModuleInfo *MMI = new MachineModuleInfo( - *TM->getMCAsmInfo(), *TM->getSubtargetImpl()->getRegisterInfo(), - TM->getObjFileLowering()); + *TM->getMCAsmInfo(), *TM->getMCRegisterInfo(), TM->getObjFileLowering()); PM.add(MMI); // Set up a MachineFunction for the rest of CodeGen to work on. @@ -145,16 +146,6 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, bool DisableVerify, AnalysisID StartAfter, AnalysisID StopAfter) { - // Passes to handle jumptable function annotations. These can't be handled at - // JIT time, so we don't add them directly to addPassesToGenerateCode. - PM.add(createJumpInstrTableInfoPass( - getSubtargetImpl()->getInstrInfo()->getJumpInstrTableEntryBound())); - PM.add(createJumpInstrTablesPass(Options.JTType)); - if (Options.FCFI) - PM.add(createForwardControlFlowIntegrityPass( - Options.JTType, Options.CFIType, Options.CFIEnforcing, - Options.getCFIFuncName())); - // Add common CodeGen passes. MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify, StartAfter, StopAfter); @@ -174,22 +165,22 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, if (Options.MCOptions.MCSaveTempLabels) Context->setAllowTemporaryLabels(false); - const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); + const MCSubtargetInfo &STI = *getMCSubtargetInfo(); const MCAsmInfo &MAI = *getMCAsmInfo(); - const MCRegisterInfo &MRI = *getSubtargetImpl()->getRegisterInfo(); - const MCInstrInfo &MII = *getSubtargetImpl()->getInstrInfo(); + const MCRegisterInfo &MRI = *getMCRegisterInfo(); + const MCInstrInfo &MII = *getMCInstrInfo(); + std::unique_ptr<MCStreamer> AsmStreamer; switch (FileType) { case CGFT_AssemblyFile: { - MCInstPrinter *InstPrinter = - getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI, - MII, MRI, STI); + MCInstPrinter *InstPrinter = getTarget().createMCInstPrinter( + MAI.getAssemblerDialect(), MAI, MII, MRI, STI); // Create a code emitter if asked to show the encoding. MCCodeEmitter *MCE = nullptr; if (Options.MCOptions.ShowMCEncoding) - MCE = getTarget().createMCCodeEmitter(MII, MRI, STI, *Context); + MCE = getTarget().createMCCodeEmitter(MII, MRI, *Context); MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(), TargetCPU); @@ -203,17 +194,16 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, case CGFT_ObjectFile: { // Create the code emitter for the target if it exists. If not, .o file // emission fails. - MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, STI, - *Context); + MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, *Context); MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(), TargetCPU); if (!MCE || !MAB) return true; - AsmStreamer.reset( - getTarget() - .createMCObjectStreamer(getTargetTriple(), *Context, *MAB, Out, MCE, - STI, Options.MCOptions.MCRelaxAll)); + Triple T(getTargetTriple()); + AsmStreamer.reset(getTarget().createMCObjectStreamer( + T, *Context, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll, + /*DWARFMustBeAtTheEnd*/ true)); break; } case CGFT_Null: @@ -253,18 +243,19 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, // Create the code emitter for the target if it exists. If not, .o file // emission fails. - const MCRegisterInfo &MRI = *getSubtargetImpl()->getRegisterInfo(); - const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); - MCCodeEmitter *MCE = getTarget().createMCCodeEmitter( - *getSubtargetImpl()->getInstrInfo(), MRI, STI, *Ctx); + const MCRegisterInfo &MRI = *getMCRegisterInfo(); + MCCodeEmitter *MCE = + getTarget().createMCCodeEmitter(*getMCInstrInfo(), MRI, *Ctx); MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(), TargetCPU); if (!MCE || !MAB) return true; + Triple T(getTargetTriple()); + const MCSubtargetInfo &STI = *getMCSubtargetInfo(); std::unique_ptr<MCStreamer> AsmStreamer(getTarget().createMCObjectStreamer( - getTargetTriple(), *Ctx, *MAB, Out, MCE, STI, - Options.MCOptions.MCRelaxAll)); + T, *Ctx, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll, + /*DWARFMustBeAtTheEnd*/ true)); // Create the AsmPrinter, which takes ownership of AsmStreamer if successful. FunctionPass *Printer = diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp index cdf505e..4321849 100644 --- a/lib/CodeGen/LatencyPriorityQueue.cpp +++ b/lib/CodeGen/LatencyPriorityQueue.cpp @@ -138,16 +138,3 @@ void LatencyPriorityQueue::remove(SUnit *SU) { std::swap(*I, Queue.back()); Queue.pop_back(); } - -#ifdef NDEBUG -void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const {} -#else -void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const { - LatencyPriorityQueue q = *this; - while (!q.empty()) { - SUnit *su = q.pop(); - dbgs() << "Height " << su->getHeight() << ": "; - su->dump(DAG); - } -} -#endif diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index dc936a3..e3791be 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -36,6 +36,7 @@ #include "llvm/IR/Value.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -276,7 +277,7 @@ public: /// getDebugLoc - Return DebugLoc of this UserValue. DebugLoc getDebugLoc() { return dl;} - void print(raw_ostream&, const TargetMachine*); + void print(raw_ostream &, const TargetRegisterInfo *); }; } // namespace @@ -362,7 +363,7 @@ public: }; } // namespace -void UserValue::print(raw_ostream &OS, const TargetMachine *TM) { +void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) { DIVariable DV(Variable); OS << "!\""; DV.printExtendedName(OS); @@ -378,7 +379,7 @@ void UserValue::print(raw_ostream &OS, const TargetMachine *TM) { } for (unsigned i = 0, e = locations.size(); i != e; ++i) { OS << " Loc" << i << '='; - locations[i].print(OS, TM); + locations[i].print(OS, TRI); } OS << '\n'; } @@ -386,7 +387,7 @@ void UserValue::print(raw_ostream &OS, const TargetMachine *TM) { void LDVImpl::print(raw_ostream &OS) { OS << "********** DEBUG VARIABLES **********\n"; for (unsigned i = 0, e = userValues.size(); i != e; ++i) - userValues[i]->print(OS, &MF->getTarget()); + userValues[i]->print(OS, TRI); } void UserValue::coalesceLocation(unsigned LocNo) { @@ -1004,7 +1005,7 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) { return; const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); for (unsigned i = 0, e = userValues.size(); i != e; ++i) { - DEBUG(userValues[i]->print(dbgs(), &MF->getTarget())); + DEBUG(userValues[i]->print(dbgs(), TRI)); userValues[i]->rewriteLocations(*VRM, *TRI); userValues[i]->emitDebugValues(VRM, *LIS, *TII); } diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index fd7516d..2afd7fa 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -32,6 +32,7 @@ #include <algorithm> using namespace llvm; +namespace { //===----------------------------------------------------------------------===// // Implementation of various methods necessary for calculation of live ranges. // The implementation of the methods abstracts from the concrete type of the @@ -293,6 +294,7 @@ private: return I; } }; +} // namespace //===----------------------------------------------------------------------===// // LiveRange methods @@ -567,13 +569,9 @@ void LiveRange::removeSegment(SlotIndex Start, SlotIndex End, /// Also remove the value# from value# list. void LiveRange::removeValNo(VNInfo *ValNo) { if (empty()) return; - iterator I = end(); - iterator E = begin(); - do { - --I; - if (I->valno == ValNo) - segments.erase(I); - } while (I != E); + segments.erase(std::remove_if(begin(), end(), [ValNo](const Segment &S) { + return S.valno == ValNo; + }), end()); // Now that ValNo is dead, remove it. markValNoForDeletion(ValNo); } @@ -747,7 +745,6 @@ void LiveRange::flushSegmentSet() { segments.empty() && "segment set can be used only initially before switching to the array"); segments.append(segmentSet->begin(), segmentSet->end()); - delete segmentSet; segmentSet = nullptr; verify(); } diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index cc08045..adca4cc 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -199,7 +199,7 @@ void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) { assert(LRCalc && "LRCalc not initialized."); assert(LI.empty() && "Should only compute empty intervals."); LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); - LRCalc->calculate(LI); + LRCalc->calculate(LI, MRI->shouldTrackSubRegLiveness(LI.reg)); computeDeadValues(LI, nullptr); } @@ -466,7 +466,7 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI, // Is the register live before? Otherwise we may have to add a read-undef // flag for subregister defs. - if (MRI->tracksSubRegLiveness()) { + if (MRI->shouldTrackSubRegLiveness(LI.reg)) { if ((I == LI.begin() || std::prev(I)->end < Def) && !VNI->isPHIDef()) { MachineInstr *MI = getInstructionFromIndex(Def); MI->addRegisterDefReadUndef(LI.reg); @@ -662,7 +662,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { RU.push_back(std::make_pair(&RURange, RURange.find(LI.begin()->end))); } - if (MRI->tracksSubRegLiveness()) { + if (MRI->subRegLivenessEnabled()) { SRs.clear(); for (const LiveInterval::SubRange &SR : LI.subranges()) { SRs.push_back(std::make_pair(&SR, SR.find(LI.begin()->end))); @@ -700,7 +700,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { goto CancelKill; } - if (MRI->tracksSubRegLiveness()) { + if (MRI->subRegLivenessEnabled()) { // When reading a partial undefined value we must not add a kill flag. // The regalloc might have used the undef lane for something else. // Example: diff --git a/lib/CodeGen/LivePhysRegs.cpp b/lib/CodeGen/LivePhysRegs.cpp index 7efd941..89567ef 100644 --- a/lib/CodeGen/LivePhysRegs.cpp +++ b/lib/CodeGen/LivePhysRegs.cpp @@ -16,6 +16,7 @@ #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp index d804b39..45e7265 100644 --- a/lib/CodeGen/LiveRangeCalc.cpp +++ b/lib/CodeGen/LiveRangeCalc.cpp @@ -50,7 +50,7 @@ static void createDeadDef(SlotIndexes &Indexes, VNInfo::Allocator &Alloc, LR.createDeadDef(DefIdx, Alloc); } -void LiveRangeCalc::calculate(LiveInterval &LI) { +void LiveRangeCalc::calculate(LiveInterval &LI, bool TrackSubRegs) { assert(MRI && Indexes && "call reset() first"); // Step 1: Create minimal live segments for every definition of Reg. @@ -63,7 +63,7 @@ void LiveRangeCalc::calculate(LiveInterval &LI) { continue; unsigned SubReg = MO.getSubReg(); - if (LI.hasSubRanges() || (SubReg != 0 && MRI->tracksSubRegLiveness())) { + if (LI.hasSubRanges() || (SubReg != 0 && TrackSubRegs)) { unsigned Mask = SubReg != 0 ? TRI.getSubRegIndexLaneMask(SubReg) : MRI->getMaxLaneMaskForVReg(Reg); diff --git a/lib/CodeGen/LiveRangeCalc.h b/lib/CodeGen/LiveRangeCalc.h index 90bf971..34d9953 100644 --- a/lib/CodeGen/LiveRangeCalc.h +++ b/lib/CodeGen/LiveRangeCalc.h @@ -187,7 +187,7 @@ public: /// Calculates liveness for the register specified in live interval @p LI. /// Creates subregister live ranges as needed if subreg liveness tracking is /// enabled. - void calculate(LiveInterval &LI); + void calculate(LiveInterval &LI, bool TrackSubRegs); //===--------------------------------------------------------------------===// // Low-level interface. diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp index 8a6ac25..5c9c679 100644 --- a/lib/CodeGen/LiveStackAnalysis.cpp +++ b/lib/CodeGen/LiveStackAnalysis.cpp @@ -61,8 +61,10 @@ LiveStacks::getOrCreateInterval(int Slot, const TargetRegisterClass *RC) { assert(Slot >= 0 && "Spill slot indice must be >= 0"); SS2IntervalMap::iterator I = S2IMap.find(Slot); if (I == S2IMap.end()) { - I = S2IMap.insert(I, std::make_pair(Slot, - LiveInterval(TargetRegisterInfo::index2StackSlot(Slot), 0.0F))); + I = S2IMap.emplace(std::piecewise_construct, std::forward_as_tuple(Slot), + std::forward_as_tuple( + TargetRegisterInfo::index2StackSlot(Slot), 0.0F)) + .first; S2RCMap.insert(std::make_pair(Slot, RC)); } else { // Use the largest common subclass register class. diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index c4bca5f..11deb81 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -36,6 +36,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include <algorithm> using namespace llvm; diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp index e8bf687..8378429 100644 --- a/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -252,7 +252,8 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) { } static inline bool -lookupCandidateBaseReg(int64_t BaseOffset, +lookupCandidateBaseReg(unsigned BaseReg, + int64_t BaseOffset, int64_t FrameSizeAdjust, int64_t LocalFrameOffset, const MachineInstr *MI, @@ -260,7 +261,7 @@ lookupCandidateBaseReg(int64_t BaseOffset, // Check if the relative offset from the where the base register references // to the target address is in range for the instruction. int64_t Offset = FrameSizeAdjust + LocalFrameOffset - BaseOffset; - return TRI->isFrameOffsetLegal(MI, Offset); + return TRI->isFrameOffsetLegal(MI, BaseReg, Offset); } bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { @@ -362,8 +363,9 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { // instruction itself will be taken into account by the target, // so we don't have to adjust for it here when reusing a base // register. - if (UsedBaseReg && lookupCandidateBaseReg(BaseOffset, FrameSizeAdjust, - LocalOffset, MI, TRI)) { + if (UsedBaseReg && lookupCandidateBaseReg(BaseReg, BaseOffset, + FrameSizeAdjust, LocalOffset, MI, + TRI)) { DEBUG(dbgs() << " Reusing base register " << BaseReg << "\n"); // We found a register to reuse. Offset = FrameSizeAdjust + LocalOffset - BaseOffset; @@ -382,7 +384,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { // then don't bother creating it. if (ref + 1 >= e || !lookupCandidateBaseReg( - BaseOffset, FrameSizeAdjust, + BaseReg, BaseOffset, FrameSizeAdjust, FrameReferenceInsns[ref + 1].getLocalOffset(), FrameReferenceInsns[ref + 1].getMachineInstr(), TRI)) { BaseOffset = PrevBaseOffset; diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 3c73905..98359b1 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -307,7 +307,7 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const { OS << '\t'; if (I->isInsideBundle()) OS << " * "; - I->print(OS, &getParent()->getTarget()); + I->print(OS); } // Print the successors of this block according to the CFG. diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 1b5c1f1..ecc50c9 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -33,6 +33,7 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -40,13 +41,14 @@ #include "llvm/Support/Allocator.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> using namespace llvm; -#define DEBUG_TYPE "block-placement2" +#define DEBUG_TYPE "block-placement" STATISTIC(NumCondBranches, "Number of conditional branches"); STATISTIC(NumUncondBranches, "Number of uncondittional branches"); @@ -61,11 +63,23 @@ static cl::opt<unsigned> AlignAllBlock("align-all-blocks", cl::init(0), cl::Hidden); // FIXME: Find a good default for this flag and remove the flag. -static cl::opt<unsigned> -ExitBlockBias("block-placement-exit-block-bias", - cl::desc("Block frequency percentage a loop exit block needs " - "over the original exit to be considered the new exit."), - cl::init(0), cl::Hidden); +static cl::opt<unsigned> ExitBlockBias( + "block-placement-exit-block-bias", + cl::desc("Block frequency percentage a loop exit block needs " + "over the original exit to be considered the new exit."), + cl::init(0), cl::Hidden); + +static cl::opt<bool> OutlineOptionalBranches( + "outline-optional-branches", + cl::desc("Put completely optional branches, i.e. branches with a common " + "post dominator, out of line."), + cl::init(false), cl::Hidden); + +static cl::opt<unsigned> OutlineOptionalThreshold( + "outline-optional-threshold", + cl::desc("Don't outline optional branches that are a single block with an " + "instruction count below this threshold"), + cl::init(4), cl::Hidden); namespace { class BlockChain; @@ -107,7 +121,7 @@ public: /// function. It also registers itself as the chain that block participates /// in with the BlockToChain mapping. BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB) - : Blocks(1, BB), BlockToChain(BlockToChain), LoopPredecessors(0) { + : Blocks(1, BB), BlockToChain(BlockToChain), LoopPredecessors(0) { assert(BB && "Cannot create a chain with a null basic block"); BlockToChain[BB] = this; } @@ -144,19 +158,18 @@ public: // Update the incoming blocks to point to this chain, and add them to the // chain structure. - for (BlockChain::iterator BI = Chain->begin(), BE = Chain->end(); - BI != BE; ++BI) { - Blocks.push_back(*BI); - assert(BlockToChain[*BI] == Chain && "Incoming blocks not in chain"); - BlockToChain[*BI] = this; + for (MachineBasicBlock *ChainBB : *Chain) { + Blocks.push_back(ChainBB); + assert(BlockToChain[ChainBB] == Chain && "Incoming blocks not in chain"); + BlockToChain[ChainBB] = this; } } #ifndef NDEBUG /// \brief Dump the blocks in this chain. LLVM_DUMP_METHOD void dump() { - for (iterator I = begin(), E = end(); I != E; ++I) - (*I)->dump(); + for (MachineBasicBlock *MBB : *this) + MBB->dump(); } #endif // NDEBUG @@ -188,6 +201,13 @@ class MachineBlockPlacement : public MachineFunctionPass { /// \brief A handle to the target's lowering info. const TargetLoweringBase *TLI; + /// \brief A handle to the post dominator tree. + MachineDominatorTree *MDT; + + /// \brief A set of blocks that are unavoidably execute, i.e. they dominate + /// all terminators of the MachineFunction. + SmallPtrSet<MachineBasicBlock *, 4> UnavoidableBlocks; + /// \brief Allocator and owner of BlockChain structures. /// /// We build BlockChains lazily while processing the loop structure of @@ -205,28 +225,26 @@ class MachineBlockPlacement : public MachineFunctionPass { /// between basic blocks. DenseMap<MachineBasicBlock *, BlockChain *> BlockToChain; - void markChainSuccessors(BlockChain &Chain, - MachineBasicBlock *LoopHeaderBB, + void markChainSuccessors(BlockChain &Chain, MachineBasicBlock *LoopHeaderBB, SmallVectorImpl<MachineBasicBlock *> &BlockWorkList, const BlockFilterSet *BlockFilter = nullptr); MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB, BlockChain &Chain, const BlockFilterSet *BlockFilter); - MachineBasicBlock *selectBestCandidateBlock( - BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList, - const BlockFilterSet *BlockFilter); - MachineBasicBlock *getFirstUnplacedBlock( - MachineFunction &F, - const BlockChain &PlacedChain, - MachineFunction::iterator &PrevUnplacedBlockIt, - const BlockFilterSet *BlockFilter); + MachineBasicBlock * + selectBestCandidateBlock(BlockChain &Chain, + SmallVectorImpl<MachineBasicBlock *> &WorkList, + const BlockFilterSet *BlockFilter); + MachineBasicBlock * + getFirstUnplacedBlock(MachineFunction &F, const BlockChain &PlacedChain, + MachineFunction::iterator &PrevUnplacedBlockIt, + const BlockFilterSet *BlockFilter); void buildChain(MachineBasicBlock *BB, BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &BlockWorkList, const BlockFilterSet *BlockFilter = nullptr); MachineBasicBlock *findBestLoopTop(MachineLoop &L, const BlockFilterSet &LoopBlockSet); - MachineBasicBlock *findBestLoopExit(MachineFunction &F, - MachineLoop &L, + MachineBasicBlock *findBestLoopExit(MachineFunction &F, MachineLoop &L, const BlockFilterSet &LoopBlockSet); void buildLoopChains(MachineFunction &F, MachineLoop &L); void rotateLoop(BlockChain &LoopChain, MachineBasicBlock *ExitingBB, @@ -244,6 +262,7 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<MachineBranchProbabilityInfo>(); AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addRequired<MachineDominatorTree>(); AU.addRequired<MachineLoopInfo>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -252,12 +271,13 @@ public: char MachineBlockPlacement::ID = 0; char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID; -INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement2", +INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement", "Branch Probability Basic Block Placement", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement2", +INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement", "Branch Probability Basic Block Placement", false, false) #ifndef NDEBUG @@ -267,8 +287,8 @@ INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement2", static std::string getBlockName(MachineBasicBlock *BB) { std::string Result; raw_string_ostream OS(Result); - OS << "BB#" << BB->getNumber() - << " (derived from LLVM BB '" << BB->getName() << "')"; + OS << "BB#" << BB->getNumber(); + OS << " (derived from LLVM BB '" << BB->getName() << "')"; OS.flush(); return Result; } @@ -292,26 +312,22 @@ static std::string getBlockNum(MachineBasicBlock *BB) { /// having one fewer active predecessor. It also adds any successors of this /// chain which reach the zero-predecessor state to the worklist passed in. void MachineBlockPlacement::markChainSuccessors( - BlockChain &Chain, - MachineBasicBlock *LoopHeaderBB, + BlockChain &Chain, MachineBasicBlock *LoopHeaderBB, SmallVectorImpl<MachineBasicBlock *> &BlockWorkList, const BlockFilterSet *BlockFilter) { // Walk all the blocks in this chain, marking their successors as having // a predecessor placed. - for (BlockChain::iterator CBI = Chain.begin(), CBE = Chain.end(); - CBI != CBE; ++CBI) { + for (MachineBasicBlock *MBB : Chain) { // Add any successors for which this is the only un-placed in-loop // predecessor to the worklist as a viable candidate for CFG-neutral // placement. No subsequent placement of this block will violate the CFG // shape, so we get to use heuristics to choose a favorable placement. - for (MachineBasicBlock::succ_iterator SI = (*CBI)->succ_begin(), - SE = (*CBI)->succ_end(); - SI != SE; ++SI) { - if (BlockFilter && !BlockFilter->count(*SI)) + for (MachineBasicBlock *Succ : MBB->successors()) { + if (BlockFilter && !BlockFilter->count(Succ)) continue; - BlockChain &SuccChain = *BlockToChain[*SI]; + BlockChain &SuccChain = *BlockToChain[Succ]; // Disregard edges within a fixed chain, or edges to the loop header. - if (&Chain == &SuccChain || *SI == LoopHeaderBB) + if (&Chain == &SuccChain || Succ == LoopHeaderBB) continue; // This is a cross-chain edge that is within the loop, so decrement the @@ -331,9 +347,10 @@ void MachineBlockPlacement::markChainSuccessors( /// very hot successor edges. /// /// \returns The best successor block found, or null if none are viable. -MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor( - MachineBasicBlock *BB, BlockChain &Chain, - const BlockFilterSet *BlockFilter) { +MachineBasicBlock * +MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB, + BlockChain &Chain, + const BlockFilterSet *BlockFilter) { const BranchProbability HotProb(4, 5); // 80% MachineBasicBlock *BestSucc = nullptr; @@ -363,6 +380,30 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor( uint32_t SuccWeight = MBPI->getEdgeWeight(BB, Succ); BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight); + // If we outline optional branches, look whether Succ is unavoidable, i.e. + // dominates all terminators of the MachineFunction. If it does, other + // successors must be optional. Don't do this for cold branches. + if (OutlineOptionalBranches && SuccProb > HotProb.getCompl() && + UnavoidableBlocks.count(Succ) > 0) { + auto HasShortOptionalBranch = [&]() { + for (MachineBasicBlock *Pred : Succ->predecessors()) { + // Check whether there is an unplaced optional branch. + if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) || + BlockToChain[Pred] == &Chain) + continue; + // Check whether the optional branch has exactly one BB. + if (Pred->pred_size() > 1 || *Pred->pred_begin() != BB) + continue; + // Check whether the optional branch is small. + if (Pred->size() < OutlineOptionalThreshold) + return true; + } + return false; + }; + if (!HasShortOptionalBranch()) + return Succ; + } + // Only consider successors which are either "hot", or wouldn't violate // any CFG constraints. if (SuccChain.LoopPredecessors != 0) { @@ -426,29 +467,26 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( // some code complexity) into the loop below. WorkList.erase(std::remove_if(WorkList.begin(), WorkList.end(), [&](MachineBasicBlock *BB) { - return BlockToChain.lookup(BB) == &Chain; - }), + return BlockToChain.lookup(BB) == &Chain; + }), WorkList.end()); MachineBasicBlock *BestBlock = nullptr; BlockFrequency BestFreq; - for (SmallVectorImpl<MachineBasicBlock *>::iterator WBI = WorkList.begin(), - WBE = WorkList.end(); - WBI != WBE; ++WBI) { - BlockChain &SuccChain = *BlockToChain[*WBI]; + for (MachineBasicBlock *MBB : WorkList) { + BlockChain &SuccChain = *BlockToChain[MBB]; if (&SuccChain == &Chain) { - DEBUG(dbgs() << " " << getBlockName(*WBI) - << " -> Already merged!\n"); + DEBUG(dbgs() << " " << getBlockName(MBB) << " -> Already merged!\n"); continue; } assert(SuccChain.LoopPredecessors == 0 && "Found CFG-violating block"); - BlockFrequency CandidateFreq = MBFI->getBlockFreq(*WBI); - DEBUG(dbgs() << " " << getBlockName(*WBI) << " -> "; - MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n"); + BlockFrequency CandidateFreq = MBFI->getBlockFreq(MBB); + DEBUG(dbgs() << " " << getBlockName(MBB) << " -> "; + MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n"); if (BestBlock && BestFreq >= CandidateFreq) continue; - BestBlock = *WBI; + BestBlock = MBB; BestFreq = CandidateFreq; } return BestBlock; @@ -481,8 +519,7 @@ MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock( } void MachineBlockPlacement::buildChain( - MachineBasicBlock *BB, - BlockChain &Chain, + MachineBasicBlock *BB, BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &BlockWorkList, const BlockFilterSet *BlockFilter) { assert(BB); @@ -509,8 +546,8 @@ void MachineBlockPlacement::buildChain( BestSucc = selectBestCandidateBlock(Chain, BlockWorkList, BlockFilter); if (!BestSucc) { - BestSucc = getFirstUnplacedBlock(F, Chain, PrevUnplacedBlockIt, - BlockFilter); + BestSucc = + getFirstUnplacedBlock(F, Chain, PrevUnplacedBlockIt, BlockFilter); if (!BestSucc) break; @@ -523,8 +560,8 @@ void MachineBlockPlacement::buildChain( // Zero out LoopPredecessors for the successor we're about to merge in case // we selected a successor that didn't fit naturally into the CFG. SuccChain.LoopPredecessors = 0; - DEBUG(dbgs() << "Merging from " << getBlockNum(BB) - << " to " << getBlockNum(BestSucc) << "\n"); + DEBUG(dbgs() << "Merging from " << getBlockNum(BB) << " to " + << getBlockNum(BestSucc) << "\n"); markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter); Chain.merge(BestSucc, &SuccChain); BB = *std::prev(Chain.end()); @@ -554,20 +591,17 @@ MachineBlockPlacement::findBestLoopTop(MachineLoop &L, if (!LoopBlockSet.count(*HeaderChain.begin())) return L.getHeader(); - DEBUG(dbgs() << "Finding best loop top for: " - << getBlockName(L.getHeader()) << "\n"); + DEBUG(dbgs() << "Finding best loop top for: " << getBlockName(L.getHeader()) + << "\n"); BlockFrequency BestPredFreq; MachineBasicBlock *BestPred = nullptr; - for (MachineBasicBlock::pred_iterator PI = L.getHeader()->pred_begin(), - PE = L.getHeader()->pred_end(); - PI != PE; ++PI) { - MachineBasicBlock *Pred = *PI; + for (MachineBasicBlock *Pred : L.getHeader()->predecessors()) { if (!LoopBlockSet.count(Pred)) continue; DEBUG(dbgs() << " header pred: " << getBlockName(Pred) << ", " << Pred->succ_size() << " successors, "; - MBFI->printBlockFreq(dbgs(), Pred) << " freq\n"); + MBFI->printBlockFreq(dbgs(), Pred) << " freq\n"); if (Pred->succ_size() > 1) continue; @@ -594,15 +628,13 @@ MachineBlockPlacement::findBestLoopTop(MachineLoop &L, return BestPred; } - /// \brief Find the best loop exiting block for layout. /// /// This routine implements the logic to analyze the loop looking for the best /// block to layout at the top of the loop. Typically this is done to maximize /// fallthrough opportunities. MachineBasicBlock * -MachineBlockPlacement::findBestLoopExit(MachineFunction &F, - MachineLoop &L, +MachineBlockPlacement::findBestLoopExit(MachineFunction &F, MachineLoop &L, const BlockFilterSet &LoopBlockSet) { // We don't want to layout the loop linearly in all cases. If the loop header // is just a normal basic block in the loop, we want to look for what block @@ -624,15 +656,13 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, // blocks where rotating to exit with that block will reach an outer loop. SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop; - DEBUG(dbgs() << "Finding best loop exit for: " - << getBlockName(L.getHeader()) << "\n"); - for (MachineLoop::block_iterator I = L.block_begin(), - E = L.block_end(); - I != E; ++I) { - BlockChain &Chain = *BlockToChain[*I]; + DEBUG(dbgs() << "Finding best loop exit for: " << getBlockName(L.getHeader()) + << "\n"); + for (MachineBasicBlock *MBB : L.getBlocks()) { + BlockChain &Chain = *BlockToChain[MBB]; // Ensure that this block is at the end of a chain; otherwise it could be // mid-way through an inner loop or a successor of an analyzable branch. - if (*I != *std::prev(Chain.end())) + if (MBB != *std::prev(Chain.end())) continue; // Now walk the successors. We need to establish whether this has a viable @@ -646,43 +676,40 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, // the MBPI analysis, we use the internal weights and manually compute the // probabilities to avoid quadratic behavior. uint32_t WeightScale = 0; - uint32_t SumWeight = MBPI->getSumForBlock(*I, WeightScale); - for (MachineBasicBlock::succ_iterator SI = (*I)->succ_begin(), - SE = (*I)->succ_end(); - SI != SE; ++SI) { - if ((*SI)->isLandingPad()) + uint32_t SumWeight = MBPI->getSumForBlock(MBB, WeightScale); + for (MachineBasicBlock *Succ : MBB->successors()) { + if (Succ->isLandingPad()) continue; - if (*SI == *I) + if (Succ == MBB) continue; - BlockChain &SuccChain = *BlockToChain[*SI]; + BlockChain &SuccChain = *BlockToChain[Succ]; // Don't split chains, either this chain or the successor's chain. if (&Chain == &SuccChain) { - DEBUG(dbgs() << " exiting: " << getBlockName(*I) << " -> " - << getBlockName(*SI) << " (chain conflict)\n"); + DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> " + << getBlockName(Succ) << " (chain conflict)\n"); continue; } - uint32_t SuccWeight = MBPI->getEdgeWeight(*I, *SI); - if (LoopBlockSet.count(*SI)) { - DEBUG(dbgs() << " looping: " << getBlockName(*I) << " -> " - << getBlockName(*SI) << " (" << SuccWeight << ")\n"); + uint32_t SuccWeight = MBPI->getEdgeWeight(MBB, Succ); + if (LoopBlockSet.count(Succ)) { + DEBUG(dbgs() << " looping: " << getBlockName(MBB) << " -> " + << getBlockName(Succ) << " (" << SuccWeight << ")\n"); HasLoopingSucc = true; continue; } unsigned SuccLoopDepth = 0; - if (MachineLoop *ExitLoop = MLI->getLoopFor(*SI)) { + if (MachineLoop *ExitLoop = MLI->getLoopFor(Succ)) { SuccLoopDepth = ExitLoop->getLoopDepth(); if (ExitLoop->contains(&L)) - BlocksExitingToOuterLoop.insert(*I); + BlocksExitingToOuterLoop.insert(MBB); } BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight); - BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(*I) * SuccProb; - DEBUG(dbgs() << " exiting: " << getBlockName(*I) << " -> " - << getBlockName(*SI) << " [L:" << SuccLoopDepth - << "] ("; - MBFI->printBlockFreq(dbgs(), ExitEdgeFreq) << ")\n"); + BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(MBB) * SuccProb; + DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> " + << getBlockName(Succ) << " [L:" << SuccLoopDepth << "] ("; + MBFI->printBlockFreq(dbgs(), ExitEdgeFreq) << ")\n"); // Note that we bias this toward an existing layout successor to retain // incoming order in the absence of better information. The exit must have // a frequency higher than the current exit before we consider breaking @@ -690,10 +717,10 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, BranchProbability Bias(100 - ExitBlockBias, 100); if (!ExitingBB || BestExitLoopDepth < SuccLoopDepth || ExitEdgeFreq > BestExitEdgeFreq || - ((*I)->isLayoutSuccessor(*SI) && + (MBB->isLayoutSuccessor(Succ) && !(ExitEdgeFreq < BestExitEdgeFreq * Bias))) { BestExitEdgeFreq = ExitEdgeFreq; - ExitingBB = *I; + ExitingBB = MBB; } } @@ -734,12 +761,10 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, MachineBasicBlock *Top = *LoopChain.begin(); bool ViableTopFallthrough = false; - for (MachineBasicBlock::pred_iterator PI = Top->pred_begin(), - PE = Top->pred_end(); - PI != PE; ++PI) { - BlockChain *PredChain = BlockToChain[*PI]; - if (!LoopBlockSet.count(*PI) && - (!PredChain || *PI == *std::prev(PredChain->end()))) { + for (MachineBasicBlock *Pred : Top->predecessors()) { + BlockChain *PredChain = BlockToChain[Pred]; + if (!LoopBlockSet.count(Pred) && + (!PredChain || Pred == *std::prev(PredChain->end()))) { ViableTopFallthrough = true; break; } @@ -750,18 +775,16 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, // introduce an unnecessary branch. if (ViableTopFallthrough) { MachineBasicBlock *Bottom = *std::prev(LoopChain.end()); - for (MachineBasicBlock::succ_iterator SI = Bottom->succ_begin(), - SE = Bottom->succ_end(); - SI != SE; ++SI) { - BlockChain *SuccChain = BlockToChain[*SI]; - if (!LoopBlockSet.count(*SI) && - (!SuccChain || *SI == *SuccChain->begin())) + for (MachineBasicBlock *Succ : Bottom->successors()) { + BlockChain *SuccChain = BlockToChain[Succ]; + if (!LoopBlockSet.count(Succ) && + (!SuccChain || Succ == *SuccChain->begin())) return; } } - BlockChain::iterator ExitIt = std::find(LoopChain.begin(), LoopChain.end(), - ExitingBB); + BlockChain::iterator ExitIt = + std::find(LoopChain.begin(), LoopChain.end(), ExitingBB); if (ExitIt == LoopChain.end()) return; @@ -778,8 +801,8 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, MachineLoop &L) { // First recurse through any nested loops, building chains for those inner // loops. - for (MachineLoop::iterator LI = L.begin(), LE = L.end(); LI != LE; ++LI) - buildLoopChains(F, **LI); + for (MachineLoop *InnerLoop : L) + buildLoopChains(F, *InnerLoop); SmallVector<MachineBasicBlock *, 16> BlockWorkList; BlockFilterSet LoopBlockSet(L.block_begin(), L.block_end()); @@ -805,21 +828,16 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, SmallPtrSet<BlockChain *, 4> UpdatedPreds; assert(LoopChain.LoopPredecessors == 0); UpdatedPreds.insert(&LoopChain); - for (MachineLoop::block_iterator BI = L.block_begin(), - BE = L.block_end(); - BI != BE; ++BI) { - BlockChain &Chain = *BlockToChain[*BI]; + for (MachineBasicBlock *LoopBB : L.getBlocks()) { + BlockChain &Chain = *BlockToChain[LoopBB]; if (!UpdatedPreds.insert(&Chain).second) continue; assert(Chain.LoopPredecessors == 0); - for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end(); - BCI != BCE; ++BCI) { - assert(BlockToChain[*BCI] == &Chain); - for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(), - PE = (*BCI)->pred_end(); - PI != PE; ++PI) { - if (BlockToChain[*PI] == &Chain || !LoopBlockSet.count(*PI)) + for (MachineBasicBlock *ChainBB : Chain) { + assert(BlockToChain[ChainBB] == &Chain); + for (MachineBasicBlock *Pred : ChainBB->predecessors()) { + if (BlockToChain[Pred] == &Chain || !LoopBlockSet.count(Pred)) continue; ++Chain.LoopPredecessors; } @@ -841,29 +859,26 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, << " Loop header: " << getBlockName(*L.block_begin()) << "\n" << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"; } - for (BlockChain::iterator BCI = LoopChain.begin(), BCE = LoopChain.end(); - BCI != BCE; ++BCI) { - dbgs() << " ... " << getBlockName(*BCI) << "\n"; - if (!LoopBlockSet.erase(*BCI)) { + for (MachineBasicBlock *ChainBB : LoopChain) { + dbgs() << " ... " << getBlockName(ChainBB) << "\n"; + if (!LoopBlockSet.erase(ChainBB)) { // We don't mark the loop as bad here because there are real situations // where this can occur. For example, with an unanalyzable fallthrough // from a loop block to a non-loop block or vice versa. dbgs() << "Loop chain contains a block not contained by the loop!\n" << " Loop header: " << getBlockName(*L.block_begin()) << "\n" << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n" - << " Bad block: " << getBlockName(*BCI) << "\n"; + << " Bad block: " << getBlockName(ChainBB) << "\n"; } } if (!LoopBlockSet.empty()) { BadLoop = true; - for (BlockFilterSet::iterator LBI = LoopBlockSet.begin(), - LBE = LoopBlockSet.end(); - LBI != LBE; ++LBI) + for (MachineBasicBlock *LoopBB : LoopBlockSet) dbgs() << "Loop contains blocks never placed into a chain!\n" << " Loop header: " << getBlockName(*L.block_begin()) << "\n" << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n" - << " Bad block: " << getBlockName(*LBI) << "\n"; + << " Bad block: " << getBlockName(LoopBB) << "\n"; } assert(!BadLoop && "Detected problems with the placement of this loop."); }); @@ -875,8 +890,8 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch. for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { MachineBasicBlock *BB = FI; - BlockChain *Chain - = new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB); + BlockChain *Chain = + new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB); // Also, merge any blocks which we cannot reason about and must preserve // the exact fallthrough behavior for. for (;;) { @@ -899,28 +914,44 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { } } + if (OutlineOptionalBranches) { + // Find the nearest common dominator of all of F's terminators. + MachineBasicBlock *Terminator = nullptr; + for (MachineBasicBlock &MBB : F) { + if (MBB.succ_size() == 0) { + if (Terminator == nullptr) + Terminator = &MBB; + else + Terminator = MDT->findNearestCommonDominator(Terminator, &MBB); + } + } + + // MBBs dominating this common dominator are unavoidable. + UnavoidableBlocks.clear(); + for (MachineBasicBlock &MBB : F) { + if (MDT->dominates(&MBB, Terminator)) { + UnavoidableBlocks.insert(&MBB); + } + } + } + // Build any loop-based chains. - for (MachineLoopInfo::iterator LI = MLI->begin(), LE = MLI->end(); LI != LE; - ++LI) - buildLoopChains(F, **LI); + for (MachineLoop *L : *MLI) + buildLoopChains(F, *L); SmallVector<MachineBasicBlock *, 16> BlockWorkList; SmallPtrSet<BlockChain *, 4> UpdatedPreds; - for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { - MachineBasicBlock *BB = &*FI; - BlockChain &Chain = *BlockToChain[BB]; + for (MachineBasicBlock &MBB : F) { + BlockChain &Chain = *BlockToChain[&MBB]; if (!UpdatedPreds.insert(&Chain).second) continue; assert(Chain.LoopPredecessors == 0); - for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end(); - BCI != BCE; ++BCI) { - assert(BlockToChain[*BCI] == &Chain); - for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(), - PE = (*BCI)->pred_end(); - PI != PE; ++PI) { - if (BlockToChain[*PI] == &Chain) + for (MachineBasicBlock *ChainBB : Chain) { + assert(BlockToChain[ChainBB] == &Chain); + for (MachineBasicBlock *Pred : ChainBB->predecessors()) { + if (BlockToChain[Pred] == &Chain) continue; ++Chain.LoopPredecessors; } @@ -940,46 +971,40 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // Crash at the end so we get all of the debugging output first. bool BadFunc = false; FunctionBlockSetType FunctionBlockSet; - for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) - FunctionBlockSet.insert(FI); + for (MachineBasicBlock &MBB : F) + FunctionBlockSet.insert(&MBB); - for (BlockChain::iterator BCI = FunctionChain.begin(), - BCE = FunctionChain.end(); - BCI != BCE; ++BCI) - if (!FunctionBlockSet.erase(*BCI)) { + for (MachineBasicBlock *ChainBB : FunctionChain) + if (!FunctionBlockSet.erase(ChainBB)) { BadFunc = true; dbgs() << "Function chain contains a block not in the function!\n" - << " Bad block: " << getBlockName(*BCI) << "\n"; + << " Bad block: " << getBlockName(ChainBB) << "\n"; } if (!FunctionBlockSet.empty()) { BadFunc = true; - for (FunctionBlockSetType::iterator FBI = FunctionBlockSet.begin(), - FBE = FunctionBlockSet.end(); - FBI != FBE; ++FBI) + for (MachineBasicBlock *RemainingBB : FunctionBlockSet) dbgs() << "Function contains blocks never placed into a chain!\n" - << " Bad block: " << getBlockName(*FBI) << "\n"; + << " Bad block: " << getBlockName(RemainingBB) << "\n"; } assert(!BadFunc && "Detected problems with the block placement."); }); // Splice the blocks into place. MachineFunction::iterator InsertPos = F.begin(); - for (BlockChain::iterator BI = FunctionChain.begin(), - BE = FunctionChain.end(); - BI != BE; ++BI) { - DEBUG(dbgs() << (BI == FunctionChain.begin() ? "Placing chain " - : " ... ") - << getBlockName(*BI) << "\n"); - if (InsertPos != MachineFunction::iterator(*BI)) - F.splice(InsertPos, *BI); + for (MachineBasicBlock *ChainBB : FunctionChain) { + DEBUG(dbgs() << (ChainBB == *FunctionChain.begin() ? "Placing chain " + : " ... ") + << getBlockName(ChainBB) << "\n"); + if (InsertPos != MachineFunction::iterator(ChainBB)) + F.splice(InsertPos, ChainBB); else ++InsertPos; // Update the terminator of the previous block. - if (BI == FunctionChain.begin()) + if (ChainBB == *FunctionChain.begin()) continue; - MachineBasicBlock *PrevBB = std::prev(MachineFunction::iterator(*BI)); + MachineBasicBlock *PrevBB = std::prev(MachineFunction::iterator(ChainBB)); // FIXME: It would be awesome of updateTerminator would just return rather // than assert when the branch cannot be analyzed in order to remove this @@ -989,16 +1014,16 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) { // The "PrevBB" is not yet updated to reflect current code layout, so, // o. it may fall-through to a block without explict "goto" instruction - // before layout, and no longer fall-through it after layout; or + // before layout, and no longer fall-through it after layout; or // o. just opposite. - // + // // AnalyzeBranch() may return erroneous value for FBB when these two // situations take place. For the first scenario FBB is mistakenly set // NULL; for the 2nd scenario, the FBB, which is expected to be NULL, // is mistakenly pointing to "*BI". // bool needUpdateBr = true; - if (!Cond.empty() && (!FBB || FBB == *BI)) { + if (!Cond.empty() && (!FBB || FBB == ChainBB)) { PrevBB->updateTerminator(); needUpdateBr = false; Cond.clear(); @@ -1018,7 +1043,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { << getBlockName(PrevBB) << "\n"); DEBUG(dbgs() << " Edge weight: " << MBPI->getEdgeWeight(PrevBB, FBB) << " vs " << MBPI->getEdgeWeight(PrevBB, TBB) << "\n"); - DebugLoc dl; // FIXME: this is nowhere + DebugLoc dl; // FIXME: this is nowhere TII->RemoveBranch(*PrevBB); TII->InsertBranch(*PrevBB, FBB, TBB, Cond, dl); needUpdateBr = true; @@ -1042,29 +1067,30 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { if (F.getFunction()->hasFnAttribute(Attribute::OptimizeForSize)) return; if (FunctionChain.begin() == FunctionChain.end()) - return; // Empty chain. + return; // Empty chain. const BranchProbability ColdProb(1, 5); // 20% BlockFrequency EntryFreq = MBFI->getBlockFreq(F.begin()); BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb; - for (BlockChain::iterator BI = std::next(FunctionChain.begin()), - BE = FunctionChain.end(); - BI != BE; ++BI) { + for (MachineBasicBlock *ChainBB : FunctionChain) { + if (ChainBB == *FunctionChain.begin()) + continue; + // Don't align non-looping basic blocks. These are unlikely to execute // enough times to matter in practice. Note that we'll still handle // unnatural CFGs inside of a natural outer loop (the common case) and // rotated loops. - MachineLoop *L = MLI->getLoopFor(*BI); + MachineLoop *L = MLI->getLoopFor(ChainBB); if (!L) continue; unsigned Align = TLI->getPrefLoopAlignment(L); if (!Align) - continue; // Don't care about loop alignment. + continue; // Don't care about loop alignment. // If the block is cold relative to the function entry don't waste space // aligning it. - BlockFrequency Freq = MBFI->getBlockFreq(*BI); + BlockFrequency Freq = MBFI->getBlockFreq(ChainBB); if (Freq < WeightedEntryFreq) continue; @@ -1077,12 +1103,13 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // Check for the existence of a non-layout predecessor which would benefit // from aligning this block. - MachineBasicBlock *LayoutPred = *std::prev(BI); + MachineBasicBlock *LayoutPred = + &*std::prev(MachineFunction::iterator(ChainBB)); // Force alignment if all the predecessors are jumps. We already checked // that the block isn't cold above. - if (!LayoutPred->isSuccessor(*BI)) { - (*BI)->setAlignment(Align); + if (!LayoutPred->isSuccessor(ChainBB)) { + ChainBB->setAlignment(Align); continue; } @@ -1090,10 +1117,11 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // cold relative to the block. When this is true, other predecessors make up // all of the hot entries into the block and thus alignment is likely to be // important. - BranchProbability LayoutProb = MBPI->getEdgeProbability(LayoutPred, *BI); + BranchProbability LayoutProb = + MBPI->getEdgeProbability(LayoutPred, ChainBB); BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb; if (LayoutEdgeFreq <= (Freq * ColdProb)) - (*BI)->setAlignment(Align); + ChainBB->setAlignment(Align); } } @@ -1110,6 +1138,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) { MLI = &getAnalysis<MachineLoopInfo>(); TII = F.getSubtarget().getInstrInfo(); TLI = F.getSubtarget().getTargetLowering(); + MDT = &getAnalysis<MachineDominatorTree>(); assert(BlockToChain.empty()); buildCFGChains(F); @@ -1119,9 +1148,8 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) { if (AlignAllBlock) // Align all of the blocks in the function to a specific alignment. - for (MachineFunction::iterator FI = F.begin(), FE = F.end(); - FI != FE; ++FI) - FI->setAlignment(AlignAllBlock); + for (MachineBasicBlock &MBB : F) + MBB.setAlignment(AlignAllBlock); // We always return true as we have no way to track whether the final order // differs from the original order. @@ -1176,20 +1204,19 @@ bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) { MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); - for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) { - BlockFrequency BlockFreq = MBFI->getBlockFreq(I); - Statistic &NumBranches = (I->succ_size() > 1) ? NumCondBranches - : NumUncondBranches; - Statistic &BranchTakenFreq = (I->succ_size() > 1) ? CondBranchTakenFreq - : UncondBranchTakenFreq; - for (MachineBasicBlock::succ_iterator SI = I->succ_begin(), - SE = I->succ_end(); - SI != SE; ++SI) { + for (MachineBasicBlock &MBB : F) { + BlockFrequency BlockFreq = MBFI->getBlockFreq(&MBB); + Statistic &NumBranches = + (MBB.succ_size() > 1) ? NumCondBranches : NumUncondBranches; + Statistic &BranchTakenFreq = + (MBB.succ_size() > 1) ? CondBranchTakenFreq : UncondBranchTakenFreq; + for (MachineBasicBlock *Succ : MBB.successors()) { // Skip if this successor is a fallthrough. - if (I->isLayoutSuccessor(*SI)) + if (MBB.isLayoutSuccessor(Succ)) continue; - BlockFrequency EdgeFreq = BlockFreq * MBPI->getEdgeProbability(I, *SI); + BlockFrequency EdgeFreq = + BlockFreq * MBPI->getEdgeProbability(&MBB, Succ); ++NumBranches; BranchTakenFreq += EdgeFreq.getFrequency(); } diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index 21b9c5a..f72d72a 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -24,6 +24,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/RecyclingAllocator.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp index cbd6272..9611122 100644 --- a/lib/CodeGen/MachineCopyPropagation.cpp +++ b/lib/CodeGen/MachineCopyPropagation.cpp @@ -75,10 +75,9 @@ MachineCopyPropagation::SourceNoLongerAvailable(unsigned Reg, I != E; ++I) { unsigned MappedDef = *I; // Source of copy is no longer available for propagation. - if (AvailCopyMap.erase(MappedDef)) { - for (MCSubRegIterator SR(MappedDef, TRI); SR.isValid(); ++SR) - AvailCopyMap.erase(*SR); - } + AvailCopyMap.erase(MappedDef); + for (MCSubRegIterator SR(MappedDef, TRI); SR.isValid(); ++SR) + AvailCopyMap.erase(*SR); } } } diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp index df60cf3..467a2e4 100644 --- a/lib/CodeGen/MachineDominators.cpp +++ b/lib/CodeGen/MachineDominators.cpp @@ -14,6 +14,7 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/SmallBitVector.h" using namespace llvm; @@ -59,3 +60,68 @@ void MachineDominatorTree::releaseMemory() { void MachineDominatorTree::print(raw_ostream &OS, const Module*) const { DT->print(OS); } + +void MachineDominatorTree::applySplitCriticalEdges() const { + // Bail out early if there is nothing to do. + if (CriticalEdgesToSplit.empty()) + return; + + // For each element in CriticalEdgesToSplit, remember whether or not element + // is the new immediate domminator of its successor. The mapping is done by + // index, i.e., the information for the ith element of CriticalEdgesToSplit is + // the ith element of IsNewIDom. + SmallBitVector IsNewIDom(CriticalEdgesToSplit.size(), true); + size_t Idx = 0; + + // Collect all the dominance properties info, before invalidating + // the underlying DT. + for (CriticalEdge &Edge : CriticalEdgesToSplit) { + // Update dominator information. + MachineBasicBlock *Succ = Edge.ToBB; + MachineDomTreeNode *SuccDTNode = DT->getNode(Succ); + + for (MachineBasicBlock *PredBB : Succ->predecessors()) { + if (PredBB == Edge.NewBB) + continue; + // If we are in this situation: + // FromBB1 FromBB2 + // + + + // + + + + + // + + + + + // ... Split1 Split2 ... + // + + + // + + + // + + // Succ + // Instead of checking the domiance property with Split2, we check it with + // FromBB2 since Split2 is still unknown of the underlying DT structure. + if (NewBBs.count(PredBB)) { + assert(PredBB->pred_size() == 1 && "A basic block resulting from a " + "critical edge split has more " + "than one predecessor!"); + PredBB = *PredBB->pred_begin(); + } + if (!DT->dominates(SuccDTNode, DT->getNode(PredBB))) { + IsNewIDom[Idx] = false; + break; + } + } + ++Idx; + } + + // Now, update DT with the collected dominance properties info. + Idx = 0; + for (CriticalEdge &Edge : CriticalEdgesToSplit) { + // We know FromBB dominates NewBB. + MachineDomTreeNode *NewDTNode = DT->addNewBlock(Edge.NewBB, Edge.FromBB); + + // If all the other predecessors of "Succ" are dominated by "Succ" itself + // then the new block is the new immediate dominator of "Succ". Otherwise, + // the new block doesn't dominate anything. + if (IsNewIDom[Idx]) + DT->changeImmediateDominator(DT->getNode(Edge.ToBB), NewDTNode); + ++Idx; + } + NewBBs.clear(); + CriticalEdgesToSplit.clear(); +} diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 151a260..6ceace8 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -54,7 +54,7 @@ void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) { MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM, unsigned FunctionNum, MachineModuleInfo &mmi) - : Fn(F), Target(TM), STI(TM.getSubtargetImpl()), Ctx(mmi.getContext()), + : Fn(F), Target(TM), STI(TM.getSubtargetImpl(*F)), Ctx(mmi.getContext()), MMI(mmi) { if (STI->getRegisterInfo()) RegInfo = new (Allocator) MachineRegisterInfo(this); @@ -584,14 +584,6 @@ int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size, return -++NumFixedObjects; } -int MachineFrameInfo::CreateFrameAllocation(uint64_t Size) { - // Force the use of a frame pointer. The intention is that this intrinsic be - // used in conjunction with unwind mechanisms that leak the frame pointer. - setFrameAddressIsTaken(true); - Size = RoundUpToAlignment(Size, StackAlignment); - return CreateStackObject(Size, StackAlignment, false); -} - BitVector MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const { assert(MBB && "MBB must be valid"); @@ -903,16 +895,16 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B, // DataLayout. if (isa<PointerType>(A->getType())) A = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy, - const_cast<Constant*>(A), TD); + const_cast<Constant *>(A), *TD); else if (A->getType() != IntTy) A = ConstantFoldInstOperands(Instruction::BitCast, IntTy, - const_cast<Constant*>(A), TD); + const_cast<Constant *>(A), *TD); if (isa<PointerType>(B->getType())) B = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy, - const_cast<Constant*>(B), TD); + const_cast<Constant *>(B), *TD); else if (B->getType() != IntTy) B = ConstantFoldInstOperands(Instruction::BitCast, IntTy, - const_cast<Constant*>(B), TD); + const_cast<Constant *>(B), *TD); return A == B; } diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 981e4a3..1240efb 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -276,17 +276,8 @@ hash_code llvm::hash_value(const MachineOperand &MO) { /// print - Print the specified machine operand. /// -void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { - // If the instruction is embedded into a basic block, we can find the - // target info for the instruction. - if (!TM) - if (const MachineInstr *MI = getParent()) - if (const MachineBasicBlock *MBB = MI->getParent()) - if (const MachineFunction *MF = MBB->getParent()) - TM = &MF->getTarget(); - const TargetRegisterInfo *TRI = - TM ? TM->getSubtargetImpl()->getRegisterInfo() : nullptr; - +void MachineOperand::print(raw_ostream &OS, + const TargetRegisterInfo *TRI) const { switch (getType()) { case MachineOperand::MO_Register: OS << PrintReg(getReg(), TRI, getSubReg()); @@ -1512,23 +1503,19 @@ void MachineInstr::dump() const { #endif } -static void printDebugLoc(DebugLoc DL, const MachineFunction *MF, - raw_ostream &CommentOS) { - const LLVMContext &Ctx = MF->getFunction()->getContext(); - DL.print(Ctx, CommentOS); -} - -void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, - bool SkipOpers) const { - // We can be a bit tidier if we know the TargetMachine and/or MachineFunction. +void MachineInstr::print(raw_ostream &OS, bool SkipOpers) const { + // We can be a bit tidier if we know the MachineFunction. const MachineFunction *MF = nullptr; + const TargetRegisterInfo *TRI = nullptr; const MachineRegisterInfo *MRI = nullptr; + const TargetInstrInfo *TII = nullptr; if (const MachineBasicBlock *MBB = getParent()) { MF = MBB->getParent(); - if (!TM && MF) - TM = &MF->getTarget(); - if (MF) + if (MF) { MRI = &MF->getRegInfo(); + TRI = MF->getSubtarget().getRegisterInfo(); + TII = MF->getSubtarget().getInstrInfo(); + } } // Save a list of virtual registers. @@ -1541,7 +1528,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, !getOperand(StartOp).isImplicit(); ++StartOp) { if (StartOp != 0) OS << ", "; - getOperand(StartOp).print(OS, TM); + getOperand(StartOp).print(OS, TRI); unsigned Reg = getOperand(StartOp).getReg(); if (TargetRegisterInfo::isVirtualRegister(Reg)) VirtRegs.push_back(Reg); @@ -1551,8 +1538,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, OS << " = "; // Print the opcode name. - if (TM && TM->getSubtargetImpl()->getInstrInfo()) - OS << TM->getSubtargetImpl()->getInstrInfo()->getName(getOpcode()); + if (TII) + OS << TII->getName(getOpcode()); else OS << "UNKNOWN"; @@ -1568,7 +1555,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, if (isInlineAsm() && e >= InlineAsm::MIOp_FirstOperand) { // Print asm string. OS << " "; - getOperand(InlineAsm::MIOp_AsmString).print(OS, TM); + getOperand(InlineAsm::MIOp_AsmString).print(OS, TRI); // Print HasSideEffects, MayLoad, MayStore, IsAlignStack unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); @@ -1606,9 +1593,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, if (TargetRegisterInfo::isPhysicalRegister(Reg)) { if (MRI->use_empty(Reg)) { bool HasAliasLive = false; - for (MCRegAliasIterator AI( - Reg, TM->getSubtargetImpl()->getRegisterInfo(), true); - AI.isValid(); ++AI) { + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { unsigned AliasReg = *AI; if (!MRI->use_empty(AliasReg)) { HasAliasLive = true; @@ -1641,10 +1626,9 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, if (DI.isVariable() && !DIV.getName().empty()) OS << "!\"" << DIV.getName() << '\"'; else - MO.print(OS, TM); - } else if (TM && (isInsertSubreg() || isRegSequence()) && MO.isImm()) { - OS << TM->getSubtargetImpl()->getRegisterInfo()->getSubRegIndexName( - MO.getImm()); + MO.print(OS, TRI); + } else if (TRI && (isInsertSubreg() || isRegSequence()) && MO.isImm()) { + OS << TRI->getSubRegIndexName(MO.getImm()); } else if (i == AsmDescOp && MO.isImm()) { // Pretty print the inline asm operand descriptor. OS << '$' << AsmOpCount++; @@ -1661,11 +1645,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, unsigned RCID = 0; if (InlineAsm::hasRegClassConstraint(Flag, RCID)) { - if (TM) { - const TargetRegisterInfo *TRI = - TM->getSubtargetImpl()->getRegisterInfo(); - OS << ':' - << TRI->getRegClassName(TRI->getRegClass(RCID)); + if (TRI) { + OS << ':' << TRI->getRegClassName(TRI->getRegClass(RCID)); } else OS << ":RC" << RCID; } @@ -1679,7 +1660,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, // Compute the index of the next operand descriptor. AsmDescOp += 1 + InlineAsm::getNumOperandRegisters(Flag); } else - MO.print(OS, TM); + MO.print(OS, TRI); } // Briefly indicate whether any call clobbers were omitted. @@ -1715,7 +1696,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, if (!HaveSemi) OS << ";"; HaveSemi = true; for (unsigned i = 0; i != VirtRegs.size(); ++i) { const TargetRegisterClass *RC = MRI->getRegClass(VirtRegs[i]); - OS << " " << MRI->getTargetRegisterInfo()->getRegClassName(RC) + OS << " " << TRI->getRegClassName(RC) << ':' << PrintReg(VirtRegs[i]); for (unsigned j = i+1; j != VirtRegs.size();) { if (MRI->getRegClass(VirtRegs[j]) != RC) { @@ -1738,7 +1719,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(InlinedAt); if (!InlinedAtDL.isUnknown() && MF) { OS << " inlined @[ "; - printDebugLoc(InlinedAtDL, MF, OS); + InlinedAtDL.print(OS); OS << " ]"; } } @@ -1747,7 +1728,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, } else if (!debugLoc.isUnknown() && MF) { if (!HaveSemi) OS << ";"; OS << " dbg:"; - printDebugLoc(debugLoc, MF, OS); + debugLoc.print(OS); } OS << '\n'; diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 64d0932..2f65a2e 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -54,6 +54,12 @@ HoistCheapInsts("hoist-cheap-insts", cl::desc("MachineLICM should hoist even cheap instructions"), cl::init(false), cl::Hidden); +static cl::opt<bool> +SinkInstsToAvoidSpills("sink-insts-to-avoid-spills", + cl::desc("MachineLICM should sink instructions into " + "loops to avoid register spills"), + cl::init(false), cl::Hidden); + STATISTIC(NumHoisted, "Number of machine instructions hoisted out of loops"); STATISTIC(NumLowRP, @@ -243,6 +249,11 @@ namespace { void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode); void HoistRegion(MachineDomTreeNode *N, bool IsHeader); + /// SinkIntoLoop - Sink instructions into loops if profitable. This + /// especially tries to prevent register spills caused by register pressure + /// if there is little to no overhead moving instructions into loops. + void SinkIntoLoop(); + /// getRegisterClassIDAndCost - For a given MI, register, and the operand /// index, return the ID and cost of its representative register class by /// reference. @@ -381,6 +392,9 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { FirstInLoop = true; HoistOutOfLoop(N); CSEMap.clear(); + + if (SinkInstsToAvoidSpills) + SinkIntoLoop(); } } @@ -771,6 +785,53 @@ void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { } } +void MachineLICM::SinkIntoLoop() { + MachineBasicBlock *Preheader = getCurPreheader(); + if (!Preheader) + return; + + SmallVector<MachineInstr *, 8> Candidates; + for (MachineBasicBlock::instr_iterator I = Preheader->instr_begin(); + I != Preheader->instr_end(); ++I) { + // We need to ensure that we can safely move this instruction into the loop. + // As such, it must not have side-effects, e.g. such as a call has. + if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(I)) + Candidates.push_back(I); + } + + for (MachineInstr *I : Candidates) { + const MachineOperand &MO = I->getOperand(0); + if (!MO.isDef() || !MO.isReg() || !MO.getReg()) + continue; + if (!MRI->hasOneDef(MO.getReg())) + continue; + bool CanSink = true; + MachineBasicBlock *B = nullptr; + for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) { + // FIXME: Come up with a proper cost model that estimates whether sinking + // the instruction (and thus possibly executing it on every loop + // iteration) is more expensive than a register. + // For now assumes that copies are cheap and thus almost always worth it. + if (!MI.isCopy()) { + CanSink = false; + break; + } + if (!B) { + B = MI.getParent(); + continue; + } + B = DT->findNearestCommonDominator(B, MI.getParent()); + if (!B) { + CanSink = false; + break; + } + } + if (!CanSink || !B || B == Preheader) + continue; + B->splice(B->getFirstNonPHI(), Preheader, I); + } +} + static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) { return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg()); } diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp index 89054d4..ce6abdd 100644 --- a/lib/CodeGen/MachineLoopInfo.cpp +++ b/lib/CodeGen/MachineLoopInfo.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; // Explicitly instantiate methods in LoopInfoImpl.h for MI-level Loops. diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index 32b7db1..278a8f2 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -65,7 +65,7 @@ MachineRegisterInfo::recomputeRegClass(unsigned Reg) { const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); const TargetRegisterClass *OldRC = getRegClass(Reg); const TargetRegisterClass *NewRC = - getTargetRegisterInfo()->getLargestLegalSuperClass(OldRC); + getTargetRegisterInfo()->getLargestLegalSuperClass(OldRC, *MF); // Stop early if there is no room to grow. if (NewRC == OldRC) diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index 89ac6a8..7a3c80b 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -209,6 +209,11 @@ static MachineSchedRegistry DefaultSchedRegistry("default", "Use the target's default scheduler choice.", useDefaultMachineSched); +static cl::opt<bool> EnableMachineSched( + "enable-misched", + cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), + cl::Hidden); + /// Forward declare the standard machine scheduler. This will be used as the /// default scheduler if the target does not set a default. static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C); @@ -304,6 +309,12 @@ ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() { /// design would be to split blocks at scheduling boundaries, but LLVM has a /// general bias against block splitting purely for implementation simplicity. bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { + if (EnableMachineSched.getNumOccurrences()) { + if (!EnableMachineSched) + return false; + } else if (!mf.getSubtarget().enableMachineScheduler()) + return false; + DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs())); // Initialize the context of the pass. diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index bdb094f..991241e 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -397,7 +397,7 @@ void MachineVerifier::report(const char *msg, assert(MO); report(msg, MO->getParent()); errs() << "- operand " << MONum << ": "; - MO->print(errs(), TM); + MO->print(errs(), TRI); errs() << "\n"; } @@ -739,7 +739,7 @@ void MachineVerifier::verifyInlineAsm(const MachineInstr *MI) { if (!isUInt<5>(MI->getOperand(1).getImm())) report("Unknown asm flags", &MI->getOperand(1), 1); - assert(InlineAsm::MIOp_FirstOperand == 2 && "Asm format changed"); + static_assert(InlineAsm::MIOp_FirstOperand == 2, "Asm format changed"); unsigned OpNo = InlineAsm::MIOp_FirstOperand; unsigned NumOps; @@ -927,7 +927,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { TII->getRegClass(MCID, MONum, TRI, *MF)) { if (SubIdx) { const TargetRegisterClass *SuperRC = - TRI->getLargestLegalSuperClass(RC); + TRI->getLargestLegalSuperClass(RC, *MF); if (!SuperRC) { report("No largest legal super class exists.", MO, MONum); return; @@ -1573,7 +1573,8 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, if (!hasRead) { // When tracking subregister liveness, the main range must start new // values on partial register writes, even if there is no read. - if (!MRI->tracksSubRegLiveness() || LaneMask != 0 || !hasSubRegDef) { + if (!MRI->shouldTrackSubRegLiveness(Reg) || LaneMask != 0 || + !hasSubRegDef) { report("Instruction ending live segment doesn't read the register", MI); errs() << S << " in " << LR << '\n'; @@ -1649,40 +1650,35 @@ void MachineVerifier::verifyLiveRange(const LiveRange &LR, unsigned Reg, } void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) { - verifyLiveRange(LI, LI.reg); - unsigned Reg = LI.reg; - if (TargetRegisterInfo::isVirtualRegister(Reg)) { - unsigned Mask = 0; - unsigned MaxMask = MRI->getMaxLaneMaskForVReg(Reg); - for (const LiveInterval::SubRange &SR : LI.subranges()) { - if ((Mask & SR.LaneMask) != 0) - report("Lane masks of sub ranges overlap in live interval", MF, LI); - if ((SR.LaneMask & ~MaxMask) != 0) - report("Subrange lanemask is invalid", MF, LI); - Mask |= SR.LaneMask; - verifyLiveRange(SR, LI.reg, SR.LaneMask); - if (!LI.covers(SR)) - report("A Subrange is not covered by the main range", MF, LI); - } - } else if (LI.hasSubRanges()) { - report("subregister liveness only allowed for virtual registers", MF, LI); + assert(TargetRegisterInfo::isVirtualRegister(Reg)); + verifyLiveRange(LI, Reg); + + unsigned Mask = 0; + unsigned MaxMask = MRI->getMaxLaneMaskForVReg(Reg); + for (const LiveInterval::SubRange &SR : LI.subranges()) { + if ((Mask & SR.LaneMask) != 0) + report("Lane masks of sub ranges overlap in live interval", MF, LI); + if ((SR.LaneMask & ~MaxMask) != 0) + report("Subrange lanemask is invalid", MF, LI); + Mask |= SR.LaneMask; + verifyLiveRange(SR, LI.reg, SR.LaneMask); + if (!LI.covers(SR)) + report("A Subrange is not covered by the main range", MF, LI); } // Check the LI only has one connected component. - if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { - ConnectedVNInfoEqClasses ConEQ(*LiveInts); - unsigned NumComp = ConEQ.Classify(&LI); - if (NumComp > 1) { - report("Multiple connected components in live interval", MF, LI); - for (unsigned comp = 0; comp != NumComp; ++comp) { - errs() << comp << ": valnos"; - for (LiveInterval::const_vni_iterator I = LI.vni_begin(), - E = LI.vni_end(); I!=E; ++I) - if (comp == ConEQ.getEqClass(*I)) - errs() << ' ' << (*I)->id; - errs() << '\n'; - } + ConnectedVNInfoEqClasses ConEQ(*LiveInts); + unsigned NumComp = ConEQ.Classify(&LI); + if (NumComp > 1) { + report("Multiple connected components in live interval", MF, LI); + for (unsigned comp = 0; comp != NumComp; ++comp) { + errs() << comp << ": valnos"; + for (LiveInterval::const_vni_iterator I = LI.vni_begin(), + E = LI.vni_end(); I!=E; ++I) + if (comp == ConEQ.getEqClass(*I)) + errs() << ' ' << (*I)->id; + errs() << '\n'; } } } diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index def2e3d..d514190 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -29,6 +29,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> @@ -46,6 +47,10 @@ SplitAllCriticalEdges("phi-elim-split-all-critical-edges", cl::init(false), cl::Hidden, cl::desc("Split all critical edges during " "PHI elimination")); +static cl::opt<bool> NoPhiElimLiveOutEarlyExit( + "no-phi-elim-live-out-early-exit", cl::init(false), cl::Hidden, + cl::desc("Do not use an early exit if isLiveOutPastPHIs returns true.")); + namespace { class PHIElimination : public MachineFunctionPass { MachineRegisterInfo *MRI; // Machine register information @@ -573,12 +578,14 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, // there is a risk it may not be coalesced away. // // If the copy would be a kill, there is no need to split the edge. - if (!isLiveOutPastPHIs(Reg, PreMBB) && !SplitAllCriticalEdges) + bool ShouldSplit = isLiveOutPastPHIs(Reg, PreMBB); + if (!ShouldSplit && !NoPhiElimLiveOutEarlyExit) continue; - - DEBUG(dbgs() << PrintReg(Reg) << " live-out before critical edge BB#" - << PreMBB->getNumber() << " -> BB#" << MBB.getNumber() - << ": " << *BBI); + if (ShouldSplit) { + DEBUG(dbgs() << PrintReg(Reg) << " live-out before critical edge BB#" + << PreMBB->getNumber() << " -> BB#" << MBB.getNumber() + << ": " << *BBI); + } // If Reg is not live-in to MBB, it means it must be live-in to some // other PreMBB successor, and we can avoid the interference by splitting @@ -588,7 +595,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, // is likely to be left after coalescing. If we are looking at a loop // exiting edge, split it so we won't insert code in the loop, otherwise // don't bother. - bool ShouldSplit = !isLiveIn(Reg, &MBB) || SplitAllCriticalEdges; + ShouldSplit = ShouldSplit && !isLiveIn(Reg, &MBB); // Check for a loop exiting edge. if (!ShouldSplit && CurLoop != PreLoop) { @@ -603,7 +610,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, // Split unless this edge is entering CurLoop from an outer loop. ShouldSplit = PreLoop && !PreLoop->contains(CurLoop); } - if (!ShouldSplit) + if (!ShouldSplit && !SplitAllCriticalEdges) continue; if (!PreMBB->SplitCriticalEdge(&MBB, this)) { DEBUG(dbgs() << "Failed to split critical edge.\n"); diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index 272d068..c128414 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -23,8 +23,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/SymbolRewriter.h" @@ -55,9 +54,6 @@ static cl::opt<bool> DisableMachineCSE("disable-machine-cse", cl::Hidden, static cl::opt<cl::boolOrDefault> OptimizeRegAlloc("optimize-regalloc", cl::Hidden, cl::desc("Enable optimized register allocation compilation path.")); -static cl::opt<cl::boolOrDefault> -EnableMachineSched("enable-misched", - cl::desc("Enable the machine instruction scheduling pass.")); static cl::opt<bool> DisablePostRAMachineLICM("disable-postra-machine-licm", cl::Hidden, cl::desc("Disable Machine LICM")); @@ -116,28 +112,6 @@ static IdentifyingPassPtr applyDisable(IdentifyingPassPtr PassID, return PassID; } -/// Allow Pass selection to be overriden by command line options. This supports -/// flags with ternary conditions. TargetID is passed through by default. The -/// pass is suppressed when the option is false. When the option is true, the -/// StandardID is selected if the target provides no default. -static IdentifyingPassPtr applyOverride(IdentifyingPassPtr TargetID, - cl::boolOrDefault Override, - AnalysisID StandardID) { - switch (Override) { - case cl::BOU_UNSET: - return TargetID; - case cl::BOU_TRUE: - if (TargetID.isValid()) - return TargetID; - if (StandardID == nullptr) - report_fatal_error("Target cannot enable pass"); - return StandardID; - case cl::BOU_FALSE: - return IdentifyingPassPtr(); - } - llvm_unreachable("Invalid command line option state"); -} - /// Allow standard passes to be disabled by the command line, regardless of who /// is adding the pass. /// @@ -182,9 +156,6 @@ static IdentifyingPassPtr overridePass(AnalysisID StandardID, if (StandardID == &MachineCSEID) return applyDisable(TargetID, DisableMachineCSE); - if (StandardID == &MachineSchedulerID) - return applyOverride(TargetID, EnableMachineSched, StandardID); - if (StandardID == &TargetPassConfig::PostRAMachineLICMID) return applyDisable(TargetID, DisablePostRAMachineLICM); @@ -249,11 +220,6 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) // Substitute Pseudo Pass IDs for real ones. substitutePass(&EarlyTailDuplicateID, &TailDuplicateID); substitutePass(&PostRAMachineLICMID, &MachineLICMID); - - // Temporarily disable experimental passes. - const TargetSubtargetInfo &ST = *TM->getSubtargetImpl(); - if (!ST.useMachineScheduler()) - disablePass(&MachineSchedulerID); } /// Insert InsertedPassID pass after TargetPassID. @@ -409,10 +375,8 @@ void TargetPassConfig::addIRPasses() { // Before running any passes, run the verifier to determine if the input // coming from the front-end and/or optimizer is valid. - if (!DisableVerify) { + if (!DisableVerify) addPass(createVerifierPass()); - addPass(createDebugInfoVerifierPass()); - } // Run loop strength reduction before anything else. if (getOptLevel() != CodeGenOpt::None && !DisableLSR) { @@ -455,7 +419,11 @@ void TargetPassConfig::addPassesToHandleExceptions() { addPass(createDwarfEHPass(TM)); break; case ExceptionHandling::WinEH: + // We support using both GCC-style and MSVC-style exceptions on Windows, so + // add both preparation passes. Each pass will only actually run if it + // recognizes the personality function. addPass(createWinEHPass(TM)); + addPass(createDwarfEHPass(TM)); break; case ExceptionHandling::None: addPass(createLowerInvokePass()); @@ -479,12 +447,6 @@ void TargetPassConfig::addCodeGenPrepare() { void TargetPassConfig::addISelPrepare() { addPreISel(); - // Need to verify DebugInfo *before* creating the stack protector analysis. - // It's a function pass, and verifying between it and its users causes a - // crash. - if (!DisableVerify) - addPass(createDebugInfoVerifierPass()); - addPass(createStackProtectorPass(TM)); if (PrintISelInput) diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index 283d1f2..ebe05e3 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -76,6 +76,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -411,8 +412,7 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, if (ExtendLife && !ExtendedUses.empty()) // Extend the liveness of the extension result. - std::copy(ExtendedUses.begin(), ExtendedUses.end(), - std::back_inserter(Uses)); + Uses.append(ExtendedUses.begin(), ExtendedUses.end()); // Now replace all uses. bool Changed = false; @@ -916,7 +916,7 @@ bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr *MI) { // => v0 = COPY v1 // Currently we haven't seen motivating example for that and we // want to avoid untested code. - NumRewrittenCopies += Changed == true; + NumRewrittenCopies += Changed; return Changed; } diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 6d29b98..e073e6a 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -16,7 +16,6 @@ // //===----------------------------------------------------------------------===// -#include "PrologEpilogInserter.h" #include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" @@ -28,6 +27,7 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/StackProtector.h" #include "llvm/IR/DiagnosticInfo.h" @@ -48,6 +48,53 @@ using namespace llvm; #define DEBUG_TYPE "pei" +namespace { +class PEI : public MachineFunctionPass { +public: + static char ID; + PEI() : MachineFunctionPass(ID) { + initializePEIPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + /// runOnMachineFunction - Insert prolog/epilog code and replace abstract + /// frame indexes with appropriate references. + /// + bool runOnMachineFunction(MachineFunction &Fn) override; + +private: + RegScavenger *RS; + + // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved + // stack frame indexes. + unsigned MinCSFrameIndex, MaxCSFrameIndex; + + // Entry and return blocks of the current function. + MachineBasicBlock *EntryBlock; + SmallVector<MachineBasicBlock *, 4> ReturnBlocks; + + // Flag to control whether to use the register scavenger to resolve + // frame index materialization registers. Set according to + // TRI->requiresFrameIndexScavenging() for the current function. + bool FrameIndexVirtualScavenging; + + void calculateSets(MachineFunction &Fn); + void calculateCallsInformation(MachineFunction &Fn); + void calculateCalleeSavedRegisters(MachineFunction &Fn); + void insertCSRSpillsAndRestores(MachineFunction &Fn); + void calculateFrameObjectOffsets(MachineFunction &Fn); + void replaceFrameIndices(MachineFunction &Fn); + void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, + int &SPAdj); + void scavengeFrameVirtualRegs(MachineFunction &Fn); + void insertPrologEpilogCode(MachineFunction &Fn); + + // Convenience for recognizing return blocks. + bool isReturnBlock(MachineBasicBlock *MBB); +}; +} // namespace + char PEI::ID = 0; char &llvm::PrologEpilogCodeInserterID = PEI::ID; @@ -810,17 +857,6 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, continue; } - // Frame allocations are target independent. Simply swap the index with - // the offset. - if (MI->getOpcode() == TargetOpcode::FRAME_ALLOC) { - assert(TFI->hasFP(Fn) && "frame alloc requires FP"); - MachineOperand &FI = MI->getOperand(i); - unsigned Reg; - int FrameOffset = TFI->getFrameIndexReference(Fn, FI.getIndex(), Reg); - FI.ChangeToImmediate(FrameOffset); - continue; - } - // Some instructions (e.g. inline asm instructions) can have // multiple frame indices and/or cause eliminateFrameIndex // to insert more than one instruction. We need the register diff --git a/lib/CodeGen/PrologEpilogInserter.h b/lib/CodeGen/PrologEpilogInserter.h deleted file mode 100644 index f88b8ef..0000000 --- a/lib/CodeGen/PrologEpilogInserter.h +++ /dev/null @@ -1,78 +0,0 @@ -//===-- PrologEpilogInserter.h - Prolog/Epilog code insertion -*- C++ -*---===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass is responsible for finalizing the functions frame layout, saving -// callee saved registers, and for emitting prolog & epilog code for the -// function. -// -// This pass must be run after register allocation. After this pass is -// executed, it is illegal to construct MO_FrameIndex operands. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_CODEGEN_PROLOGEPILOGINSERTER_H -#define LLVM_LIB_CODEGEN_PROLOGEPILOGINSERTER_H - -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SparseBitVector.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/Target/TargetRegisterInfo.h" - -namespace llvm { - class RegScavenger; - class MachineBasicBlock; - - class PEI : public MachineFunctionPass { - public: - static char ID; - PEI() : MachineFunctionPass(ID) { - initializePEIPass(*PassRegistry::getPassRegistry()); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override; - - /// runOnMachineFunction - Insert prolog/epilog code and replace abstract - /// frame indexes with appropriate references. - /// - bool runOnMachineFunction(MachineFunction &Fn) override; - - private: - RegScavenger *RS; - - // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved - // stack frame indexes. - unsigned MinCSFrameIndex, MaxCSFrameIndex; - - // Entry and return blocks of the current function. - MachineBasicBlock* EntryBlock; - SmallVector<MachineBasicBlock*, 4> ReturnBlocks; - - // Flag to control whether to use the register scavenger to resolve - // frame index materialization registers. Set according to - // TRI->requiresFrameIndexScavenging() for the curren function. - bool FrameIndexVirtualScavenging; - - void calculateSets(MachineFunction &Fn); - void calculateCallsInformation(MachineFunction &Fn); - void calculateCalleeSavedRegisters(MachineFunction &Fn); - void insertCSRSpillsAndRestores(MachineFunction &Fn); - void calculateFrameObjectOffsets(MachineFunction &Fn); - void replaceFrameIndices(MachineFunction &Fn); - void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, - int &SPAdj); - void scavengeFrameVirtualRegs(MachineFunction &Fn); - void insertPrologEpilogCode(MachineFunction &Fn); - - // Convenience for recognizing return blocks. - bool isReturnBlock(MachineBasicBlock* MBB); - }; -} // End llvm namespace -#endif diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp index 6b346f4..16ff48e 100644 --- a/lib/CodeGen/RegAllocBase.cpp +++ b/lib/CodeGen/RegAllocBase.cpp @@ -27,6 +27,7 @@ #endif #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Timer.h" diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index edc3294..e94f1bb 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -1554,7 +1554,8 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, DEBUG(dbgs() << "Split around " << Uses.size() << " individual instrs.\n"); - const TargetRegisterClass *SuperRC = TRI->getLargestLegalSuperClass(CurRC); + const TargetRegisterClass *SuperRC = + TRI->getLargestLegalSuperClass(CurRC, *MF); unsigned SuperRCNumAllocatableRegs = RCI.getNumAllocatableRegs(SuperRC); // Split around every non-copy instruction if this split will relax // the constraints on the virtual register. diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index 77a42b3..eeff73d 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -178,8 +178,40 @@ class Interference : public PBQPRAConstraint { private: typedef const PBQP::RegAlloc::AllowedRegVector* AllowedRegVecPtr; - typedef std::pair<AllowedRegVecPtr, AllowedRegVecPtr> IMatrixKey; - typedef DenseMap<IMatrixKey, PBQPRAGraph::MatrixPtr> IMatrixCache; + typedef std::pair<AllowedRegVecPtr, AllowedRegVecPtr> IKey; + typedef DenseMap<IKey, PBQPRAGraph::MatrixPtr> IMatrixCache; + typedef DenseSet<IKey> DisjointAllowedRegsCache; + typedef std::pair<PBQP::GraphBase::NodeId, PBQP::GraphBase::NodeId> IEdgeKey; + typedef DenseSet<IEdgeKey> IEdgeCache; + + bool haveDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId, + PBQPRAGraph::NodeId MId, + const DisjointAllowedRegsCache &D) const { + const auto *NRegs = &G.getNodeMetadata(NId).getAllowedRegs(); + const auto *MRegs = &G.getNodeMetadata(MId).getAllowedRegs(); + + if (NRegs == MRegs) + return false; + + if (NRegs < MRegs) + return D.count(IKey(NRegs, MRegs)) > 0; + + return D.count(IKey(MRegs, NRegs)) > 0; + } + + void setDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId, + PBQPRAGraph::NodeId MId, + DisjointAllowedRegsCache &D) { + const auto *NRegs = &G.getNodeMetadata(NId).getAllowedRegs(); + const auto *MRegs = &G.getNodeMetadata(MId).getAllowedRegs(); + + assert(NRegs != MRegs && "AllowedRegs can not be disjoint with itself"); + + if (NRegs < MRegs) + D.insert(IKey(NRegs, MRegs)); + else + D.insert(IKey(MRegs, NRegs)); + } // Holds (Interval, CurrentSegmentID, and NodeId). The first two are required // for the fast interference graph construction algorithm. The last is there @@ -247,6 +279,13 @@ public: // and uniquing them. IMatrixCache C; + // Finding an edge is expensive in the worst case (O(max_clique(G))). So + // cache locally edges we have already seen. + IEdgeCache EC; + + // Cache known disjoint allowed registers pairs + DisjointAllowedRegsCache D; + typedef std::set<IntervalInfo, decltype(&lowestEndPoint)> IntervalSet; typedef std::priority_queue<IntervalInfo, std::vector<IntervalInfo>, decltype(&lowestStartPoint)> IntervalQueue; @@ -290,14 +329,21 @@ public: for (const auto &A : Active) { PBQP::GraphBase::NodeId MId = getNodeId(A); + // Do not add an edge when the nodes' allowed registers do not + // intersect: there is obviously no interference. + if (haveDisjointAllowedRegs(G, NId, MId, D)) + continue; + // Check that we haven't already added this edge - // FIXME: findEdge is expensive in the worst case (O(max_clique(G))). - // It might be better to replace this with a local bit-matrix. - if (G.findEdge(NId, MId) != PBQPRAGraph::invalidEdgeId()) + IEdgeKey EK(std::min(NId, MId), std::max(NId, MId)); + if (EC.count(EK)) continue; // This is a new edge - add it to the graph. - createInterferenceEdge(G, NId, MId, C); + if (!createInterferenceEdge(G, NId, MId, C)) + setDisjointAllowedRegs(G, NId, MId, D); + else + EC.insert(EK); } // Finally, add Cur to the Active set. @@ -307,35 +353,48 @@ public: private: - void createInterferenceEdge(PBQPRAGraph &G, PBQPRAGraph::NodeId NId, - PBQPRAGraph::NodeId MId, IMatrixCache &C) { + // Create an Interference edge and add it to the graph, unless it is + // a null matrix, meaning the nodes' allowed registers do not have any + // interference. This case occurs frequently between integer and floating + // point registers for example. + // return true iff both nodes interferes. + bool createInterferenceEdge(PBQPRAGraph &G, + PBQPRAGraph::NodeId NId, PBQPRAGraph::NodeId MId, + IMatrixCache &C) { const TargetRegisterInfo &TRI = *G.getMetadata().MF.getSubtarget().getRegisterInfo(); - const auto &NRegs = G.getNodeMetadata(NId).getAllowedRegs(); const auto &MRegs = G.getNodeMetadata(MId).getAllowedRegs(); // Try looking the edge costs up in the IMatrixCache first. - IMatrixKey K(&NRegs, &MRegs); + IKey K(&NRegs, &MRegs); IMatrixCache::iterator I = C.find(K); if (I != C.end()) { G.addEdgeBypassingCostAllocator(NId, MId, I->second); - return; + return true; } PBQPRAGraph::RawMatrix M(NRegs.size() + 1, MRegs.size() + 1, 0); + bool NodesInterfere = false; for (unsigned I = 0; I != NRegs.size(); ++I) { unsigned PRegN = NRegs[I]; for (unsigned J = 0; J != MRegs.size(); ++J) { unsigned PRegM = MRegs[J]; - if (TRI.regsOverlap(PRegN, PRegM)) + if (TRI.regsOverlap(PRegN, PRegM)) { M[I + 1][J + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity(); + NodesInterfere = true; + } } } + if (!NodesInterfere) + return false; + PBQPRAGraph::EdgeId EId = G.addEdge(NId, MId, std::move(M)); C[K] = G.getEdgeCostsPtr(EId); + + return true; } }; diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp index ab33672..178fa18 100644 --- a/lib/CodeGen/RegisterClassInfo.cpp +++ b/lib/CodeGen/RegisterClassInfo.cpp @@ -131,7 +131,8 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const { RCI.NumRegs = StressRA; // Check if RC is a proper sub-class. - if (const TargetRegisterClass *Super = TRI->getLargestLegalSuperClass(RC)) + if (const TargetRegisterClass *Super = + TRI->getLargestLegalSuperClass(RC, *MF)) if (Super != RC && getNumAllocatableRegs(Super) > RCI.NumRegs) RCI.ProperSubClass = true; @@ -175,6 +176,6 @@ unsigned RegisterClassInfo::computePSetLimit(unsigned Idx) const { } compute(RC); unsigned NReserved = RC->getNumRegs() - getNumAllocatableRegs(RC); - return TRI->getRegPressureSetLimit(Idx) - - TRI->getRegClassWeight(RC).RegWeight * NReserved; + return TRI->getRegPressureSetLimit(*MF, Idx) - + TRI->getRegClassWeight(RC).RegWeight * NReserved; } diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 1e4cfe8..9e3cf41 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -58,6 +58,10 @@ EnableJoining("join-liveintervals", cl::desc("Coalesce copies (default=true)"), cl::init(true)); +static cl::opt<bool> UseTerminalRule("terminal-rule", + cl::desc("Apply the terminal rule"), + cl::init(false)); + /// Temporary flag to test critical edge unsplitting. static cl::opt<bool> EnableJoinSplits("join-splitedges", @@ -160,12 +164,14 @@ namespace { /// LaneMask are split as necessary. @p LaneMask are the lanes that /// @p ToMerge will occupy in the coalescer register. @p LI has its subrange /// lanemasks already adjusted to the coalesced register. - void mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge, + /// @returns false if live range conflicts couldn't get resolved. + bool mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge, unsigned LaneMask, CoalescerPair &CP); /// Join the liveranges of two subregisters. Joins @p RRange into /// @p LRange, @p RRange may be invalid afterwards. - void joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, + /// @returns false if live range conflicts couldn't get resolved. + bool joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, unsigned LaneMask, const CoalescerPair &CP); /// We found a non-trivially-coalescable copy. If the source value number is @@ -204,6 +210,20 @@ namespace { /// Returns true if @p CopyMI was a copy of an undef value and eliminated. bool eliminateUndefCopy(MachineInstr *CopyMI); + /// Check whether or not we should apply the terminal rule on the + /// destination (Dst) of \p Copy. + /// When the terminal rule applies, Copy is not profitable to + /// coalesce. + /// Dst is terminal if it has exactly one affinity (Dst, Src) and + /// at least one interference (Dst, Dst2). If Dst is terminal, the + /// terminal rule consists in checking that at least one of + /// interfering node, say Dst2, has an affinity of equal or greater + /// weight with Src. + /// In that case, Dst2 and Dst will not be able to be both coalesced + /// with Src. Since Dst2 exposes more coalescing opportunities than + /// Dst, we can drop \p Copy. + bool applyTerminalRule(const MachineInstr &Copy) const; + public: static char ID; ///< Class identification, replacement for typeinfo RegisterCoalescer() : MachineFunctionPass(ID) { @@ -1143,7 +1163,7 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, // A subreg use of a partially undef (super) register may be a complete // undef use now and then has to be marked that way. - if (SubIdx != 0 && MO.isUse() && MRI->tracksSubRegLiveness()) { + if (SubIdx != 0 && MO.isUse() && MRI->shouldTrackSubRegLiveness(DstReg)) { if (!DstInt->hasSubRanges()) { BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); unsigned Mask = MRI->getMaxLaneMaskForVReg(DstInt->reg); @@ -1756,6 +1776,9 @@ public: void eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs, SmallVectorImpl<unsigned> &ShrinkRegs); + /// Remove liverange defs at places where implicit defs will be removed. + void removeImplicitDefs(); + /// Get the value assignments suitable for passing to LiveInterval::join. const int *getAssignments() const { return Assignments.data(); } }; @@ -1856,7 +1879,11 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { assert(DefMI != nullptr); if (SubRangeJoin) { // We don't care about the lanes when joining subregister ranges. - V.ValidLanes = V.WriteLanes = 1; + V.WriteLanes = V.ValidLanes = 1; + if (DefMI->isImplicitDef()) { + V.ValidLanes = 0; + V.ErasableImplicitDef = true; + } } else { bool Redef = false; V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef); @@ -2339,6 +2366,18 @@ void JoinVals::pruneSubRegValues(LiveInterval &LI, unsigned &ShrinkMask) LI.removeEmptySubRanges(); } +void JoinVals::removeImplicitDefs() { + for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { + Val &V = Vals[i]; + if (V.Resolution != CR_Keep || !V.ErasableImplicitDef || !V.Pruned) + continue; + + VNInfo *VNI = LR.getValNumInfo(i); + VNI->markUnused(); + LR.removeValNo(VNI); + } +} + void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs, SmallVectorImpl<unsigned> &ShrinkRegs) { for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { @@ -2382,7 +2421,7 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs, } } -void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, +bool RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, unsigned LaneMask, const CoalescerPair &CP) { SmallVector<VNInfo*, 16> NewVNInfo; @@ -2392,12 +2431,19 @@ void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, NewVNInfo, CP, LIS, TRI, true, true); // Compute NewVNInfo and resolve conflicts (see also joinVirtRegs()) - // Conflicts should already be resolved so the mapping/resolution should - // always succeed. - if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) - llvm_unreachable("Can't join subrange although main ranges are compatible"); - if (!LHSVals.resolveConflicts(RHSVals) || !RHSVals.resolveConflicts(LHSVals)) - llvm_unreachable("Can't join subrange although main ranges are compatible"); + // We should be able to resolve all conflicts here as we could successfully do + // it on the mainrange already. There is however a problem when multiple + // ranges get mapped to the "overflow" lane mask bit which creates unexpected + // interferences. + if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) { + DEBUG(dbgs() << "*** Couldn't join subrange!\n"); + return false; + } + if (!LHSVals.resolveConflicts(RHSVals) || + !RHSVals.resolveConflicts(LHSVals)) { + DEBUG(dbgs() << "*** Couldn't join subrange!\n"); + return false; + } // The merging algorithm in LiveInterval::join() can't handle conflicting // value mappings, so we need to remove any live ranges that overlap a @@ -2407,6 +2453,9 @@ void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, LHSVals.pruneValues(RHSVals, EndPoints, false); RHSVals.pruneValues(LHSVals, EndPoints, false); + LHSVals.removeImplicitDefs(); + RHSVals.removeImplicitDefs(); + LRange.verify(); RRange.verify(); @@ -2416,16 +2465,17 @@ void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, DEBUG(dbgs() << "\t\tjoined lanes: " << LRange << "\n"); if (EndPoints.empty()) - return; + return true; // Recompute the parts of the live range we had to remove because of // CR_Replace conflicts. DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: " << LRange << '\n'); LIS->extendToIndices(LRange, EndPoints); + return true; } -void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI, +bool RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge, unsigned LaneMask, CoalescerPair &CP) { BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); @@ -2453,7 +2503,8 @@ void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI, CommonRange = &R; } LiveRange RangeCopy(ToMerge, Allocator); - joinSubRegRanges(*CommonRange, RangeCopy, Common, CP); + if (!joinSubRegRanges(*CommonRange, RangeCopy, Common, CP)) + return false; LaneMask &= ~RMask; } @@ -2461,13 +2512,14 @@ void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI, DEBUG(dbgs() << format("\t\tNew Lane %04X\n", LaneMask)); LI.createSubRangeFrom(Allocator, LaneMask, ToMerge); } + return true; } bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { SmallVector<VNInfo*, 16> NewVNInfo; LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); LiveInterval &LHS = LIS->getInterval(CP.getDstReg()); - bool TrackSubRegLiveness = MRI->tracksSubRegLiveness(); + bool TrackSubRegLiveness = MRI->shouldTrackSubRegLiveness(*CP.getNewRC()); JoinVals RHSVals(RHS, CP.getSrcReg(), CP.getSrcIdx(), 0, NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness); JoinVals LHSVals(LHS, CP.getDstReg(), CP.getDstIdx(), 0, NewVNInfo, CP, LIS, @@ -2511,22 +2563,40 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { // Determine lanemasks of RHS in the coalesced register and merge subranges. unsigned SrcIdx = CP.getSrcIdx(); + bool Abort = false; if (!RHS.hasSubRanges()) { unsigned Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask() : TRI->getSubRegIndexLaneMask(SrcIdx); - mergeSubRangeInto(LHS, RHS, Mask, CP); + if (!mergeSubRangeInto(LHS, RHS, Mask, CP)) + Abort = true; } else { // Pair up subranges and merge. for (LiveInterval::SubRange &R : RHS.subranges()) { unsigned Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask); - mergeSubRangeInto(LHS, R, Mask, CP); + if (!mergeSubRangeInto(LHS, R, Mask, CP)) { + Abort = true; + break; + } } } + if (Abort) { + // This shouldn't have happened :-( + // However we are aware of at least one existing problem where we + // can't merge subranges when multiple ranges end up in the + // "overflow bit" 32. As a workaround we drop all subregister ranges + // which means we loose some precision but are back to a well defined + // state. + assert((CP.getNewRC()->getLaneMask() & 0x80000000u) + && "SubRange merge should only fail when merging into bit 32."); + DEBUG(dbgs() << "\tSubrange join aborted!\n"); + LHS.clearSubRanges(); + RHS.clearSubRanges(); + } else { + DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n"); - DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n"); - - LHSVals.pruneSubRegValues(LHS, ShrinkMask); - RHSVals.pruneSubRegValues(LHS, ShrinkMask); + LHSVals.pruneSubRegValues(LHS, ShrinkMask); + RHSVals.pruneSubRegValues(LHS, ShrinkMask); + } } // The merging algorithm in LiveInterval::join() can't handle conflicting @@ -2645,6 +2715,58 @@ copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) { return Progress; } +/// Check if DstReg is a terminal node. +/// I.e., it does not have any affinity other than \p Copy. +static bool isTerminalReg(unsigned DstReg, const MachineInstr &Copy, + const MachineRegisterInfo *MRI) { + assert(Copy.isCopyLike()); + // Check if the destination of this copy as any other affinity. + for (const MachineInstr &MI : MRI->reg_nodbg_instructions(DstReg)) + if (&MI != &Copy && MI.isCopyLike()) + return false; + return true; +} + +bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const { + assert(Copy.isCopyLike()); + if (!UseTerminalRule) + return false; + // Check if the destination of this copy has any other affinity. + unsigned DstReg = Copy.getOperand(0).getReg(); + if (TargetRegisterInfo::isPhysicalRegister(DstReg) || + !isTerminalReg(DstReg, Copy, MRI)) + return false; + + // DstReg is a terminal node. Check if it inteferes with any other + // copy involving SrcReg. + unsigned SrcReg = Copy.getOperand(1).getReg(); + const MachineBasicBlock *OrigBB = Copy.getParent(); + const LiveInterval &DstLI = LIS->getInterval(DstReg); + for (const MachineInstr &MI : MRI->reg_nodbg_instructions(SrcReg)) { + // Technically we should check if the weight of the new copy is + // interesting compared to the other one and update the weight + // of the copies accordingly. However, this would only work if + // we would gather all the copies first then coalesce, whereas + // right now we interleave both actions. + // For now, just consider the copies that are in the same block. + if (&MI == &Copy || !MI.isCopyLike() || MI.getParent() != OrigBB) + continue; + unsigned OtherReg = MI.getOperand(0).getReg(); + if (OtherReg == SrcReg) + OtherReg = MI.getOperand(1).getReg(); + // Check if OtherReg is a non-terminal. + if (TargetRegisterInfo::isPhysicalRegister(OtherReg) || + isTerminalReg(OtherReg, MI, MRI)) + continue; + // Check that OtherReg interfere with DstReg. + if (LIS->getInterval(OtherReg).overlaps(DstLI)) { + DEBUG(dbgs() << "Apply terminal rule for: " << PrintReg(DstReg) << '\n'); + return true; + } + } + return false; +} + void RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) { DEBUG(dbgs() << MBB->getName() << ":\n"); @@ -2659,7 +2781,7 @@ RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) { // cmp+jmp macro fusion. for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) { - if (!MII->isCopyLike()) + if (!MII->isCopyLike() || applyTerminalRule(*MII)) continue; if (isLocalCopy(&(*MII), LIS)) LocalWorkList.push_back(&(*MII)); @@ -2670,7 +2792,7 @@ RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) { else { for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) - if (MII->isCopyLike()) + if (MII->isCopyLike() && !applyTerminalRule(*MII)) WorkList.push_back(MII); } // Try coalescing the collected copies immediately, and remove the nulls. @@ -2741,7 +2863,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { AA = &getAnalysis<AliasAnalysis>(); Loops = &getAnalysis<MachineLoopInfo>(); if (EnableGlobalCopies == cl::BOU_UNSET) - JoinGlobalCopies = STI.useMachineScheduler(); + JoinGlobalCopies = STI.enableJoinGlobalCopies(); else JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE); diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp index 9925efb..3634103 100644 --- a/lib/CodeGen/RegisterPressure.cpp +++ b/lib/CodeGen/RegisterPressure.cpp @@ -304,6 +304,7 @@ static bool containsReg(ArrayRef<unsigned> RegUnits, unsigned RegUnit) { return std::find(RegUnits.begin(), RegUnits.end(), RegUnit) != RegUnits.end(); } +namespace { /// Collect this instruction's unique uses and defs into SmallVectors for /// processing defs and uses in order. /// @@ -354,6 +355,7 @@ protected: } } }; +} // namespace /// Collect physical and virtual register operands. static void collectOperands(const MachineInstr *MI, diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 78bfd23..17dd729 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -96,14 +96,15 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) { /// getUnderlyingObjects - This is a wrapper around GetUnderlyingObjects /// and adds support for basic ptrtoint+arithmetic+inttoptr sequences. static void getUnderlyingObjects(const Value *V, - SmallVectorImpl<Value *> &Objects) { + SmallVectorImpl<Value *> &Objects, + const DataLayout &DL) { SmallPtrSet<const Value *, 16> Visited; SmallVector<const Value *, 4> Working(1, V); do { V = Working.pop_back_val(); SmallVector<Value *, 4> Objs; - GetUnderlyingObjects(const_cast<Value *>(V), Objs); + GetUnderlyingObjects(const_cast<Value *>(V), Objs, DL); for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end(); I != IE; ++I) { @@ -132,7 +133,8 @@ UnderlyingObjectsVector; /// object, return the Value for that object. static void getUnderlyingObjectsForInstr(const MachineInstr *MI, const MachineFrameInfo *MFI, - UnderlyingObjectsVector &Objects) { + UnderlyingObjectsVector &Objects, + const DataLayout &DL) { if (!MI->hasOneMemOperand() || (!(*MI->memoperands_begin())->getValue() && !(*MI->memoperands_begin())->getPseudoValue()) || @@ -156,7 +158,7 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI, return; SmallVector<Value *, 4> Objs; - getUnderlyingObjects(V, Objs); + getUnderlyingObjects(V, Objs, DL); for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end(); I != IE; ++I) { @@ -468,7 +470,8 @@ static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) { // This MI might have either incomplete info, or known to be unsafe // to deal with (i.e. volatile object). static inline bool isUnsafeMemoryObject(MachineInstr *MI, - const MachineFrameInfo *MFI) { + const MachineFrameInfo *MFI, + const DataLayout &DL) { if (!MI || MI->memoperands_empty()) return true; // We purposefully do no check for hasOneMemOperand() here @@ -491,7 +494,7 @@ static inline bool isUnsafeMemoryObject(MachineInstr *MI, return true; SmallVector<Value *, 4> Objs; - getUnderlyingObjects(V, Objs); + getUnderlyingObjects(V, Objs, DL); for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end(); I != IE; ++I) { // Does this pointer refer to a distinct and identifiable object? @@ -508,7 +511,7 @@ static inline bool isUnsafeMemoryObject(MachineInstr *MI, /// these two MIs be reordered during scheduling from memory dependency /// point of view. static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI, - MachineInstr *MIa, + const DataLayout &DL, MachineInstr *MIa, MachineInstr *MIb) { const MachineFunction *MF = MIa->getParent()->getParent(); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); @@ -527,7 +530,7 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI, if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand()) return true; - if (isUnsafeMemoryObject(MIa, MFI) || isUnsafeMemoryObject(MIb, MFI)) + if (isUnsafeMemoryObject(MIa, MFI, DL) || isUnsafeMemoryObject(MIb, MFI, DL)) return true; // If we are dealing with two "normal" loads, we do not need an edge @@ -579,10 +582,10 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI, /// This recursive function iterates over chain deps of SUb looking for /// "latest" node that needs a chain edge to SUa. -static unsigned -iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI, - SUnit *SUa, SUnit *SUb, SUnit *ExitSU, unsigned *Depth, - SmallPtrSetImpl<const SUnit*> &Visited) { +static unsigned iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI, + const DataLayout &DL, SUnit *SUa, SUnit *SUb, + SUnit *ExitSU, unsigned *Depth, + SmallPtrSetImpl<const SUnit *> &Visited) { if (!SUa || !SUb || SUb == ExitSU) return *Depth; @@ -607,7 +610,7 @@ iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI, // add that edge to the predecessors chain of SUb, // and stop descending. if (*Depth > 200 || - MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) { + MIsNeedChainEdge(AA, MFI, DL, SUa->getInstr(), SUb->getInstr())) { SUb->addPred(SDep(SUa, SDep::MayAliasMem)); return *Depth; } @@ -617,7 +620,7 @@ iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI, for (SUnit::const_succ_iterator I = SUb->Succs.begin(), E = SUb->Succs.end(); I != E; ++I) if (I->isNormalMemoryOrBarrier()) - iterateChainSucc (AA, MFI, SUa, I->getSUnit(), ExitSU, Depth, Visited); + iterateChainSucc(AA, MFI, DL, SUa, I->getSUnit(), ExitSU, Depth, Visited); return *Depth; } @@ -626,7 +629,8 @@ iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI, /// checks whether SU can be aliasing any node dominated /// by it. static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI, - SUnit *SU, SUnit *ExitSU, std::set<SUnit *> &CheckList, + const DataLayout &DL, SUnit *SU, SUnit *ExitSU, + std::set<SUnit *> &CheckList, unsigned LatencyToLoad) { if (!SU) return; @@ -638,7 +642,7 @@ static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI, I != IE; ++I) { if (SU == *I) continue; - if (MIsNeedChainEdge(AA, MFI, SU->getInstr(), (*I)->getInstr())) { + if (MIsNeedChainEdge(AA, MFI, DL, SU->getInstr(), (*I)->getInstr())) { SDep Dep(SU, SDep::MayAliasMem); Dep.setLatency(((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0); (*I)->addPred(Dep); @@ -649,22 +653,22 @@ static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI, for (SUnit::const_succ_iterator J = (*I)->Succs.begin(), JE = (*I)->Succs.end(); J != JE; ++J) if (J->isNormalMemoryOrBarrier()) - iterateChainSucc (AA, MFI, SU, J->getSUnit(), - ExitSU, &Depth, Visited); + iterateChainSucc(AA, MFI, DL, SU, J->getSUnit(), ExitSU, &Depth, + Visited); } } /// Check whether two objects need a chain edge, if so, add it /// otherwise remember the rejected SU. -static inline -void addChainDependency (AliasAnalysis *AA, const MachineFrameInfo *MFI, - SUnit *SUa, SUnit *SUb, - std::set<SUnit *> &RejectList, - unsigned TrueMemOrderLatency = 0, - bool isNormalMemory = false) { +static inline void addChainDependency(AliasAnalysis *AA, + const MachineFrameInfo *MFI, + const DataLayout &DL, SUnit *SUa, + SUnit *SUb, std::set<SUnit *> &RejectList, + unsigned TrueMemOrderLatency = 0, + bool isNormalMemory = false) { // If this is a false dependency, // do not add the edge, but rememeber the rejected node. - if (MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) { + if (MIsNeedChainEdge(AA, MFI, DL, SUa->getInstr(), SUb->getInstr())) { SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier); Dep.setLatency(TrueMemOrderLatency); SUb->addPred(Dep); @@ -883,7 +887,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, BarrierChain = SU; // This is a barrier event that acts as a pivotal node in the DAG, // so it is safe to clear list of exposed nodes. - adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, + adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, RejectMemNodes, TrueMemOrderLatency); RejectMemNodes.clear(); NonAliasMemDefs.clear(); @@ -896,25 +900,27 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, unsigned ChainLatency = 0; if (AliasChain->getInstr()->mayLoad()) ChainLatency = TrueMemOrderLatency; - addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes, - ChainLatency); + addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, AliasChain, + RejectMemNodes, ChainLatency); } AliasChain = SU; for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) - addChainDependency(AAForDep, MFI, SU, PendingLoads[k], RejectMemNodes, + addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, + PendingLoads[k], RejectMemNodes, TrueMemOrderLatency); for (MapVector<ValueType, std::vector<SUnit *> >::iterator I = AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) { for (unsigned i = 0, e = I->second.size(); i != e; ++i) - addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes); + addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, + I->second[i], RejectMemNodes); } for (MapVector<ValueType, std::vector<SUnit *> >::iterator I = AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) { for (unsigned i = 0, e = I->second.size(); i != e; ++i) - addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes, - TrueMemOrderLatency); + addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, + I->second[i], RejectMemNodes, TrueMemOrderLatency); } - adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, + adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, RejectMemNodes, TrueMemOrderLatency); PendingLoads.clear(); AliasMemDefs.clear(); @@ -928,7 +934,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, BarrierChain->addPred(SDep(SU, SDep::Barrier)); UnderlyingObjectsVector Objs; - getUnderlyingObjectsForInstr(MI, MFI, Objs); + getUnderlyingObjectsForInstr(MI, MFI, Objs, *TM.getDataLayout()); if (Objs.empty()) { // Treat all other stores conservatively. @@ -952,8 +958,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); if (I != IE) { for (unsigned i = 0, e = I->second.size(); i != e; ++i) - addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes, - 0, true); + addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, + I->second[i], RejectMemNodes, 0, true); // If we're not using AA, then we only need one store per object. if (!AAForDep) @@ -977,7 +983,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, ((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end()); if (J != JE) { for (unsigned i = 0, e = J->second.size(); i != e; ++i) - addChainDependency(AAForDep, MFI, SU, J->second[i], RejectMemNodes, + addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, + J->second[i], RejectMemNodes, TrueMemOrderLatency, true); J->second.clear(); } @@ -986,13 +993,15 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, // Add dependencies from all the PendingLoads, i.e. loads // with no underlying object. for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) - addChainDependency(AAForDep, MFI, SU, PendingLoads[k], RejectMemNodes, + addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, + PendingLoads[k], RejectMemNodes, TrueMemOrderLatency); // Add dependence on alias chain, if needed. if (AliasChain) - addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes); + addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, AliasChain, + RejectMemNodes); } - adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, + adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, RejectMemNodes, TrueMemOrderLatency); } else if (MI->mayLoad()) { bool MayAlias = true; @@ -1000,7 +1009,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, // Invariant load, no chain dependencies needed! } else { UnderlyingObjectsVector Objs; - getUnderlyingObjectsForInstr(MI, MFI, Objs); + getUnderlyingObjectsForInstr(MI, MFI, Objs, *TM.getDataLayout()); if (Objs.empty()) { // A load with no underlying object. Depend on all @@ -1008,8 +1017,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, for (MapVector<ValueType, std::vector<SUnit *> >::iterator I = AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) for (unsigned i = 0, e = I->second.size(); i != e; ++i) - addChainDependency(AAForDep, MFI, SU, I->second[i], - RejectMemNodes); + addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, + I->second[i], RejectMemNodes); PendingLoads.push_back(SU); MayAlias = true; @@ -1032,18 +1041,20 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); if (I != IE) for (unsigned i = 0, e = I->second.size(); i != e; ++i) - addChainDependency(AAForDep, MFI, SU, I->second[i], - RejectMemNodes, 0, true); + addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, + I->second[i], RejectMemNodes, 0, true); if (ThisMayAlias) AliasMemUses[V].push_back(SU); else NonAliasMemUses[V].push_back(SU); } if (MayAlias) - adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, /*Latency=*/0); + adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, + RejectMemNodes, /*Latency=*/0); // Add dependencies on alias and barrier chains, if needed. if (MayAlias && AliasChain) - addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes); + addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, AliasChain, + RejectMemNodes); if (BarrierChain) BarrierChain->addPred(SDep(SU, SDep::Barrier)); } @@ -1211,7 +1222,7 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { else if (SU == &ExitSU) oss << "<exit>"; else - SU->getInstr()->print(oss, &TM, /*SkipOpers=*/true); + SU->getInstr()->print(oss, /*SkipOpers=*/true); return oss.str(); } diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 6129401..a1c84c5 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -246,10 +246,11 @@ namespace { SDValue visitSDIVREM(SDNode *N); SDValue visitUDIVREM(SDNode *N); SDValue visitAND(SDNode *N); + SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference); SDValue visitOR(SDNode *N); + SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference); SDValue visitXOR(SDNode *N); SDValue SimplifyVBinOp(SDNode *N); - SDValue SimplifyVUnaryOp(SDNode *N); SDValue visitSHL(SDNode *N); SDValue visitSRA(SDNode *N); SDValue visitSRL(SDNode *N); @@ -302,6 +303,7 @@ namespace { SDValue visitCONCAT_VECTORS(SDNode *N); SDValue visitEXTRACT_SUBVECTOR(SDNode *N); SDValue visitVECTOR_SHUFFLE(SDNode *N); + SDValue visitSCALAR_TO_VECTOR(SDNode *N); SDValue visitINSERT_SUBVECTOR(SDNode *N); SDValue visitMLOAD(SDNode *N); SDValue visitMSTORE(SDNode *N); @@ -713,6 +715,22 @@ static SDNode *isConstantBuildVectorOrConstantInt(SDValue N) { return nullptr; } +static SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N) { + if (isa<ConstantSDNode>(N)) + return N.getNode(); + if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) + return N.getNode(); + return nullptr; +} + +static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) { + if (isa<ConstantFPSDNode>(N)) + return N.getNode(); + if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode())) + return N.getNode(); + return nullptr; +} + // \brief Returns the SDNode if it is a constant splat BuildVector or constant // int. static ConstantSDNode *isConstOrConstSplat(SDValue N) { @@ -1180,11 +1198,6 @@ void DAGCombiner::Run(CombineLevel AtLevel) { LegalOperations = Level >= AfterLegalizeVectorOps; LegalTypes = Level >= AfterLegalizeTypes; - // Early exit if this basic block is in an optnone function. - if (DAG.getMachineFunction().getFunction()->hasFnAttribute( - Attribute::OptimizeNone)) - return; - // Add all the dag nodes to the worklist. for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), E = DAG.allnodes_end(); I != E; ++I) @@ -1369,6 +1382,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); + case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N); case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N); case ISD::MLOAD: return visitMLOAD(N); case ISD::MSTORE: return visitMSTORE(N); @@ -2685,6 +2699,109 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { return SDValue(); } +/// This contains all DAGCombine rules which reduce two values combined by +/// an And operation to a single value. This makes them reusable in the context +/// of visitSELECT(). Rules involving constants are not included as +/// visitSELECT() already handles those cases. +SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, + SDNode *LocReference) { + EVT VT = N1.getValueType(); + + // fold (and x, undef) -> 0 + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + // fold (and (setcc x), (setcc y)) -> (setcc (and x, y)) + SDValue LL, LR, RL, RR, CC0, CC1; + if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ + ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); + ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); + + if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && + LL.getValueType().isInteger()) { + // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0) + if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) { + SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), + LR.getValueType(), LL, RL); + AddToWorklist(ORNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); + } + // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) + if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) { + SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0), + LR.getValueType(), LL, RL); + AddToWorklist(ANDNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1); + } + // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) + if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) { + SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), + LR.getValueType(), LL, RL); + AddToWorklist(ORNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); + } + } + // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2) + if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) && + Op0 == Op1 && LL.getValueType().isInteger() && + Op0 == ISD::SETNE && ((cast<ConstantSDNode>(LR)->isNullValue() && + cast<ConstantSDNode>(RR)->isAllOnesValue()) || + (cast<ConstantSDNode>(LR)->isAllOnesValue() && + cast<ConstantSDNode>(RR)->isNullValue()))) { + SDValue ADDNode = DAG.getNode(ISD::ADD, SDLoc(N0), LL.getValueType(), + LL, DAG.getConstant(1, LL.getValueType())); + AddToWorklist(ADDNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode, + DAG.getConstant(2, LL.getValueType()), ISD::SETUGE); + } + // canonicalize equivalent to ll == rl + if (LL == RR && LR == RL) { + Op1 = ISD::getSetCCSwappedOperands(Op1); + std::swap(RL, RR); + } + if (LL == RL && LR == RR) { + bool isInteger = LL.getValueType().isInteger(); + ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger); + if (Result != ISD::SETCC_INVALID && + (!LegalOperations || + (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && + TLI.isOperationLegal(ISD::SETCC, + getSetCCResultType(N0.getSimpleValueType()))))) + return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(), + LL, LR, Result); + } + } + + if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL && + VT.getSizeInBits() <= 64) { + if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + APInt ADDC = ADDI->getAPIntValue(); + if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) { + // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal + // immediate for an add, but it is legal if its top c2 bits are set, + // transform the ADD so the immediate doesn't need to be materialized + // in a register. + if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) { + APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), + SRLI->getZExtValue()); + if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) { + ADDC |= Mask; + if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) { + SDValue NewAdd = + DAG.getNode(ISD::ADD, SDLoc(N0), VT, + N0.getOperand(0), DAG.getConstant(ADDC, VT)); + CombineTo(N0.getNode(), NewAdd); + // Return N so it doesn't get rechecked! + return SDValue(LocReference, 0); + } + } + } + } + } + } + + return SDValue(); +} + SDValue DAGCombiner::visitAND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -2716,9 +2833,6 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return N0; } - // fold (and x, undef) -> 0 - if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); // fold (and c1, c2) -> c1&c2 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); @@ -2808,9 +2922,13 @@ SDValue DAGCombiner::visitAND(SDNode *N) { SplatBitSize = SplatBitSize * 2) SplatValue |= SplatValue.shl(SplatBitSize); - Constant = APInt::getAllOnesValue(BitWidth); - for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i) - Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth); + // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a + // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value. + if (SplatBitSize % BitWidth == 0) { + Constant = APInt::getAllOnesValue(BitWidth); + for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i) + Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth); + } } } @@ -2863,118 +2981,6 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return SDValue(N, 0); // Return N so it doesn't get rechecked! } } - // fold (and (setcc x), (setcc y)) -> (setcc (and x, y)) - SDValue LL, LR, RL, RR, CC0, CC1; - if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ - ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); - ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); - - if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && - LL.getValueType().isInteger()) { - // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0) - if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) { - SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), - LR.getValueType(), LL, RL); - AddToWorklist(ORNode.getNode()); - return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); - } - // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) - if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) { - SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0), - LR.getValueType(), LL, RL); - AddToWorklist(ANDNode.getNode()); - return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1); - } - // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) - if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) { - SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), - LR.getValueType(), LL, RL); - AddToWorklist(ORNode.getNode()); - return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); - } - } - // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2) - if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) && - Op0 == Op1 && LL.getValueType().isInteger() && - Op0 == ISD::SETNE && ((cast<ConstantSDNode>(LR)->isNullValue() && - cast<ConstantSDNode>(RR)->isAllOnesValue()) || - (cast<ConstantSDNode>(LR)->isAllOnesValue() && - cast<ConstantSDNode>(RR)->isNullValue()))) { - SDValue ADDNode = DAG.getNode(ISD::ADD, SDLoc(N0), LL.getValueType(), - LL, DAG.getConstant(1, LL.getValueType())); - AddToWorklist(ADDNode.getNode()); - return DAG.getSetCC(SDLoc(N), VT, ADDNode, - DAG.getConstant(2, LL.getValueType()), ISD::SETUGE); - } - // canonicalize equivalent to ll == rl - if (LL == RR && LR == RL) { - Op1 = ISD::getSetCCSwappedOperands(Op1); - std::swap(RL, RR); - } - if (LL == RL && LR == RR) { - bool isInteger = LL.getValueType().isInteger(); - ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger); - if (Result != ISD::SETCC_INVALID && - (!LegalOperations || - (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && - TLI.isOperationLegal(ISD::SETCC, - getSetCCResultType(N0.getSimpleValueType()))))) - return DAG.getSetCC(SDLoc(N), N0.getValueType(), - LL, LR, Result); - } - } - - // Simplify: (and (op x...), (op y...)) -> (op (and x, y)) - if (N0.getOpcode() == N1.getOpcode()) { - SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); - if (Tmp.getNode()) return Tmp; - } - - // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) - // fold (and (sra)) -> (and (srl)) when possible. - if (!VT.isVector() && - SimplifyDemandedBits(SDValue(N, 0))) - return SDValue(N, 0); - - // fold (zext_inreg (extload x)) -> (zextload x) - if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) { - LoadSDNode *LN0 = cast<LoadSDNode>(N0); - EVT MemVT = LN0->getMemoryVT(); - // If we zero all the possible extended bits, then we can turn this into - // a zextload if we are running before legalize or the operation is legal. - unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); - if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, - BitWidth - MemVT.getScalarType().getSizeInBits())) && - ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, - LN0->getChain(), LN0->getBasePtr(), - MemVT, LN0->getMemOperand()); - AddToWorklist(N); - CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); - return SDValue(N, 0); // Return N so it doesn't get rechecked! - } - } - // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use - if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && - N0.hasOneUse()) { - LoadSDNode *LN0 = cast<LoadSDNode>(N0); - EVT MemVT = LN0->getMemoryVT(); - // If we zero all the possible extended bits, then we can turn this into - // a zextload if we are running before legalize or the operation is legal. - unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); - if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, - BitWidth - MemVT.getScalarType().getSizeInBits())) && - ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, - LN0->getChain(), LN0->getBasePtr(), - MemVT, LN0->getMemOperand()); - AddToWorklist(N); - CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); - return SDValue(N, 0); // Return N so it doesn't get rechecked! - } - } // fold (and (load x), 255) -> (zextload x, i8) // fold (and (extload x, i16), 255) -> (zextload x, i8) @@ -3046,33 +3052,60 @@ SDValue DAGCombiner::visitAND(SDNode *N) { } } - if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL && - VT.getSizeInBits() <= 64) { - if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { - APInt ADDC = ADDI->getAPIntValue(); - if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) { - // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal - // immediate for an add, but it is legal if its top c2 bits are set, - // transform the ADD so the immediate doesn't need to be materialized - // in a register. - if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) { - APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), - SRLI->getZExtValue()); - if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) { - ADDC |= Mask; - if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) { - SDValue NewAdd = - DAG.getNode(ISD::ADD, SDLoc(N0), VT, - N0.getOperand(0), DAG.getConstant(ADDC, VT)); - CombineTo(N0.getNode(), NewAdd); - return SDValue(N, 0); // Return N so it doesn't get rechecked! - } - } - } - } - } + if (SDValue Combined = visitANDLike(N0, N1, N)) + return Combined; + + // Simplify: (and (op x...), (op y...)) -> (op (and x, y)) + if (N0.getOpcode() == N1.getOpcode()) { + SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); + if (Tmp.getNode()) return Tmp; } + // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) + // fold (and (sra)) -> (and (srl)) when possible. + if (!VT.isVector() && + SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + // fold (zext_inreg (extload x)) -> (zextload x) + if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + EVT MemVT = LN0->getMemoryVT(); + // If we zero all the possible extended bits, then we can turn this into + // a zextload if we are running before legalize or the operation is legal. + unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); + if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, + BitWidth - MemVT.getScalarType().getSizeInBits())) && + ((!LegalOperations && !LN0->isVolatile()) || + TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, + LN0->getChain(), LN0->getBasePtr(), + MemVT, LN0->getMemOperand()); + AddToWorklist(N); + CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use + if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && + N0.hasOneUse()) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + EVT MemVT = LN0->getMemoryVT(); + // If we zero all the possible extended bits, then we can turn this into + // a zextload if we are running before legalize or the operation is legal. + unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); + if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, + BitWidth - MemVT.getScalarType().getSizeInBits())) && + ((!LegalOperations && !LN0->isVolatile()) || + TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, + LN0->getChain(), LN0->getBasePtr(), + MemVT, LN0->getMemOperand()); + AddToWorklist(N); + CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const) if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) { SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), @@ -3338,6 +3371,98 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { DAG.getNode(ISD::SRL, SDLoc(N), VT, BSwap, ShAmt)); } +/// This contains all DAGCombine rules which reduce two values combined by +/// an Or operation to a single value \see visitANDLike(). +SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) { + EVT VT = N1.getValueType(); + // fold (or x, undef) -> -1 + if (!LegalOperations && + (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) { + EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; + return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); + } + // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) + SDValue LL, LR, RL, RR, CC0, CC1; + if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ + ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); + ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); + + if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && + LL.getValueType().isInteger()) { + // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0) + // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0) + if (cast<ConstantSDNode>(LR)->isNullValue() && + (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) { + SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR), + LR.getValueType(), LL, RL); + AddToWorklist(ORNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); + } + // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1) + // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1) + if (cast<ConstantSDNode>(LR)->isAllOnesValue() && + (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) { + SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR), + LR.getValueType(), LL, RL); + AddToWorklist(ANDNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1); + } + } + // canonicalize equivalent to ll == rl + if (LL == RR && LR == RL) { + Op1 = ISD::getSetCCSwappedOperands(Op1); + std::swap(RL, RR); + } + if (LL == RL && LR == RR) { + bool isInteger = LL.getValueType().isInteger(); + ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger); + if (Result != ISD::SETCC_INVALID && + (!LegalOperations || + (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && + TLI.isOperationLegal(ISD::SETCC, + getSetCCResultType(N0.getValueType()))))) + return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(), + LL, LR, Result); + } + } + + // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible. + if (N0.getOpcode() == ISD::AND && + N1.getOpcode() == ISD::AND && + N0.getOperand(1).getOpcode() == ISD::Constant && + N1.getOperand(1).getOpcode() == ISD::Constant && + // Don't increase # computations. + (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { + // We can only do this xform if we know that bits from X that are set in C2 + // but not in C1 are already zero. Likewise for Y. + const APInt &LHSMask = + cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + const APInt &RHSMask = + cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue(); + + if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && + DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { + SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, + N0.getOperand(0), N1.getOperand(0)); + return DAG.getNode(ISD::AND, SDLoc(LocReference), VT, X, + DAG.getConstant(LHSMask | RHSMask, VT)); + } + } + + // (or (and X, M), (and X, N)) -> (and X, (or M, N)) + if (N0.getOpcode() == ISD::AND && + N1.getOpcode() == ISD::AND && + N0.getOperand(0) == N1.getOperand(0) && + // Don't increase # computations. + (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { + SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, + N0.getOperand(1), N1.getOperand(1)); + return DAG.getNode(ISD::AND, SDLoc(LocReference), VT, N0.getOperand(0), X); + } + + return SDValue(); +} + SDValue DAGCombiner::visitOR(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -3425,12 +3550,6 @@ SDValue DAGCombiner::visitOR(SDNode *N) { } } - // fold (or x, undef) -> -1 - if (!LegalOperations && - (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) { - EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; - return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); - } // fold (or c1, c2) -> c1|c2 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); @@ -3449,6 +3568,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) { if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())) return N1; + if (SDValue Combined = visitORLike(N0, N1, N)) + return Combined; + // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16) SDValue BSwap = MatchBSwapHWord(N, N0, N1); if (BSwap.getNode()) @@ -3474,91 +3596,12 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return SDValue(); } } - // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) - SDValue LL, LR, RL, RR, CC0, CC1; - if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ - ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); - ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); - - if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && - LL.getValueType().isInteger()) { - // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0) - // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0) - if (cast<ConstantSDNode>(LR)->isNullValue() && - (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) { - SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR), - LR.getValueType(), LL, RL); - AddToWorklist(ORNode.getNode()); - return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); - } - // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1) - // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1) - if (cast<ConstantSDNode>(LR)->isAllOnesValue() && - (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) { - SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR), - LR.getValueType(), LL, RL); - AddToWorklist(ANDNode.getNode()); - return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1); - } - } - // canonicalize equivalent to ll == rl - if (LL == RR && LR == RL) { - Op1 = ISD::getSetCCSwappedOperands(Op1); - std::swap(RL, RR); - } - if (LL == RL && LR == RR) { - bool isInteger = LL.getValueType().isInteger(); - ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger); - if (Result != ISD::SETCC_INVALID && - (!LegalOperations || - (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && - TLI.isOperationLegal(ISD::SETCC, - getSetCCResultType(N0.getValueType()))))) - return DAG.getSetCC(SDLoc(N), N0.getValueType(), - LL, LR, Result); - } - } - // Simplify: (or (op x...), (op y...)) -> (op (or x, y)) if (N0.getOpcode() == N1.getOpcode()) { SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); if (Tmp.getNode()) return Tmp; } - // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible. - if (N0.getOpcode() == ISD::AND && - N1.getOpcode() == ISD::AND && - N0.getOperand(1).getOpcode() == ISD::Constant && - N1.getOperand(1).getOpcode() == ISD::Constant && - // Don't increase # computations. - (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { - // We can only do this xform if we know that bits from X that are set in C2 - // but not in C1 are already zero. Likewise for Y. - const APInt &LHSMask = - cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); - const APInt &RHSMask = - cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue(); - - if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && - DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { - SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, - N0.getOperand(0), N1.getOperand(0)); - return DAG.getNode(ISD::AND, SDLoc(N), VT, X, - DAG.getConstant(LHSMask | RHSMask, VT)); - } - } - - // (or (and X, M), (and X, N)) -> (and X, (or M, N)) - if (N0.getOpcode() == ISD::AND && - N1.getOpcode() == ISD::AND && - N0.getOperand(0) == N1.getOperand(0) && - // Don't increase # computations. - (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { - SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, - N0.getOperand(1), N1.getOperand(1)); - return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), X); - } - // See if this is some rotate idiom. if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N))) return SDValue(Rot, 0); @@ -3947,6 +3990,32 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { if (N0 == N1) return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); + // fold (xor (shl 1, x), -1) -> (rotl ~1, x) + // Here is a concrete example of this equivalence: + // i16 x == 14 + // i16 shl == 1 << 14 == 16384 == 0b0100000000000000 + // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111 + // + // => + // + // i16 ~1 == 0b1111111111111110 + // i16 rol(~1, 14) == 0b1011111111111111 + // + // Some additional tips to help conceptualize this transform: + // - Try to see the operation as placing a single zero in a value of all ones. + // - There exists no value for x which would allow the result to contain zero. + // - Values of x larger than the bitwidth are undefined and do not require a + // consistent result. + // - Pushing the zero left requires shifting one bits in from the right. + // A rotate left of ~1 is a nice way of achieving the desired result. + if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) + if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) + if (N0.getOpcode() == ISD::SHL) + if (auto *ShlLHS = dyn_cast<ConstantSDNode>(N0.getOperand(0))) + if (N1C->isAllOnesValue() && ShlLHS->isOne()) + return DAG.getNode(ISD::ROTL, SDLoc(N), VT, DAG.getConstant(~1, VT), + N0.getOperand(1)); + // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) if (N0.getOpcode() == N1.getOpcode()) { SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); @@ -4792,6 +4861,69 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { return SimplifySelect(SDLoc(N), N0, N1, N2); } + if (VT0 == MVT::i1) { + if (TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) { + // select (and Cond0, Cond1), X, Y + // -> select Cond0, (select Cond1, X, Y), Y + if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) { + SDValue Cond0 = N0->getOperand(0); + SDValue Cond1 = N0->getOperand(1); + SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), + N1.getValueType(), Cond1, N1, N2); + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, + InnerSelect, N2); + } + // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y) + if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) { + SDValue Cond0 = N0->getOperand(0); + SDValue Cond1 = N0->getOperand(1); + SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), + N1.getValueType(), Cond1, N1, N2); + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1, + InnerSelect); + } + } + + // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y + if (N1->getOpcode() == ISD::SELECT) { + SDValue N1_0 = N1->getOperand(0); + SDValue N1_1 = N1->getOperand(1); + SDValue N1_2 = N1->getOperand(2); + if (N1_2 == N2) { + // Create the actual and node if we can generate good code for it. + if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) { + SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(), + N0, N1_0); + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And, + N1_1, N2); + } + // Otherwise see if we can optimize the "and" to a better pattern. + if (SDValue Combined = visitANDLike(N0, N1_0, N)) + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined, + N1_1, N2); + } + } + // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y + if (N2->getOpcode() == ISD::SELECT) { + SDValue N2_0 = N2->getOperand(0); + SDValue N2_1 = N2->getOperand(1); + SDValue N2_2 = N2->getOperand(2); + if (N2_1 == N1) { + // Create the actual or node if we can generate good code for it. + if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) { + SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(), + N0, N2_0); + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or, + N1, N2_2); + } + // Otherwise see if we can optimize to a better pattern. + if (SDValue Combined = visitORLike(N0, N2_0, N)) + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined, + N1, N2_2); + } + } + } + return SDValue(); } @@ -6440,7 +6572,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { if (N0.getValueType() == N->getValueType(0)) return N0; // fold (truncate c1) -> c1 - if (isa<ConstantSDNode>(N0)) + if (isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0); // fold (truncate (truncate x)) -> (truncate x) if (N0.getOpcode() == ISD::TRUNCATE) @@ -7453,14 +7585,23 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { // Fold scalars or any vector constants (not just splats). // This fold is done in general by InstCombine, but extra fmul insts // may have been generated during lowering. + SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); auto *BV1 = dyn_cast<BuildVectorSDNode>(N1); + auto *BV00 = dyn_cast<BuildVectorSDNode>(N00); auto *BV01 = dyn_cast<BuildVectorSDNode>(N01); - if ((N1CFP && isConstOrConstSplatFP(N01)) || - (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) { - SDLoc SL(N); - SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, N01, N1); - return DAG.getNode(ISD::FMUL, SL, VT, N0.getOperand(0), MulConsts); + + // Check 1: Make sure that the first operand of the inner multiply is NOT + // a constant. Otherwise, we may induce infinite looping. + if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) { + // Check 2: Make sure that the second operand of the inner multiply and + // the second operand of the outer multiply are constants. + if ((N1CFP && isConstOrConstSplatFP(N01)) || + (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) { + SDLoc SL(N); + SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, N01, N1); + return DAG.getNode(ISD::FMUL, SL, VT, N00, MulConsts); + } } } @@ -7821,8 +7962,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { EVT OpVT = N0.getValueType(); // fold (sint_to_fp c1) -> c1fp - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - if (N0C && + if (isConstantIntBuildVectorOrConstantInt(N0) && // ...but only if the target supports immediate floating-point values (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) @@ -7874,8 +8014,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { EVT OpVT = N0.getValueType(); // fold (uint_to_fp c1) -> c1fp - ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); - if (N0C && + if (isConstantIntBuildVectorOrConstantInt(N0) && // ...but only if the target supports immediate floating-point values (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) @@ -8033,7 +8172,6 @@ SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); - ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); EVT VT = N->getValueType(0); // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded. @@ -8042,7 +8180,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { return SDValue(); // fold (fp_extend c1fp) -> c1fp - if (N0CFP) + if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0); // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the @@ -8117,14 +8255,9 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVUnaryOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; - } - // Constant fold FNEG. - if (isa<ConstantFPSDNode>(N0)) - return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N->getOperand(0)); + if (isConstantFPBuildVectorOrConstantFP(N0)) + return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0); if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(), &DAG.getTarget().Options)) @@ -8219,13 +8352,8 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVUnaryOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; - } - // fold (fabs c1) -> fabs(c1) - if (isa<ConstantFPSDNode>(N0)) + if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); // fold (fabs (fabs x)) -> (fabs x) @@ -8941,7 +9069,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(), Align, LD->getAAInfo()); - return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true); + if (NewLoad.getNode() != N) + return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true); } } } @@ -9106,9 +9235,6 @@ struct LoadedSlice { unsigned Shift = 0, SelectionDAG *DAG = nullptr) : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {} - LoadedSlice(const LoadedSlice &LS) - : Inst(LS.Inst), Origin(LS.Origin), Shift(LS.Shift), DAG(LS.DAG) {} - /// \brief Get the bits used in a chunk of bits \p BitWidth large. /// \return Result is \p BitWidth and has used bits set to 1 and /// not used bits set to 0. @@ -9855,6 +9981,7 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { return SDValue(); } +namespace { /// Helper struct to parse and store a memory address as base + index + offset. /// We ignore sign extensions when it is safe to do so. /// The following two expressions are not equivalent. To differentiate we need @@ -9942,6 +10069,7 @@ struct BaseIndexOffset { return BaseIndexOffset(Base, Index, Off, IsIndexSignExt); } }; +} // namespace bool DAGCombiner::MergeStoresOfConstantsOrVecElts( SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, @@ -10575,11 +10703,15 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // Try to infer better alignment information than the store already has. if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { - if (Align > ST->getAlignment()) - return DAG.getTruncStore(Chain, SDLoc(N), Value, + if (Align > ST->getAlignment()) { + SDValue NewStore = + DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(), ST->getMemoryVT(), ST->isVolatile(), ST->isNonTemporal(), Align, ST->getAAInfo()); + if (NewStore.getNode() != N) + return CombineTo(ST, NewStore, true); + } } } @@ -11226,12 +11358,10 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (ISD::allOperandsUndef(N)) return DAG.getUNDEF(VT); - SDValue V = reduceBuildVecExtToExtBuildVec(N); - if (V.getNode()) + if (SDValue V = reduceBuildVecExtToExtBuildVec(N)) return V; - V = reduceBuildVecConvertToConvertBuildVec(N); - if (V.getNode()) + if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N)) return V; // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT @@ -11352,7 +11482,9 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { } else if (VecInT.getSizeInBits() == VT.getSizeInBits() * 2) { // If the input vector is too large, try to split it. // We don't support having two input vectors that are too large. - if (VecIn2.getNode()) + // If the zero vector was used, we can not split the vector, + // since we'd need 3 inputs. + if (UsesZeroVector || VecIn2.getNode()) return SDValue(); if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements())) @@ -11364,7 +11496,6 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { DAG.getConstant(VT.getVectorNumElements(), TLI.getVectorIdxTy())); VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, DAG.getConstant(0, TLI.getVectorIdxTy())); - UsesZeroVector = false; } else return SDValue(); } @@ -11465,14 +11596,12 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { unsigned NumElts = OpVT.getVectorNumElements(); if (ISD::UNDEF == Op.getOpcode()) - for (unsigned i = 0; i != NumElts; ++i) - Opnds.push_back(DAG.getUNDEF(MinVT)); + Opnds.append(NumElts, DAG.getUNDEF(MinVT)); if (ISD::BUILD_VECTOR == Op.getOpcode()) { if (SVT.isFloatingPoint()) { assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch"); - for (unsigned i = 0; i != NumElts; ++i) - Opnds.push_back(Op.getOperand(i)); + Opnds.append(Op->op_begin(), Op->op_begin() + NumElts); } else { for (unsigned i = 0; i != NumElts; ++i) Opnds.push_back( @@ -11850,7 +11979,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // We may have jumped through bitcasts, so the type of the // BUILD_VECTOR may not match the type of the shuffle. if (V->getValueType(0) != VT) - NewBV = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, NewBV); + NewBV = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, NewBV); return NewBV; } } @@ -11872,6 +12001,81 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return V; } + // If this shuffle only has a single input that is a bitcasted shuffle, + // attempt to merge the 2 shuffles and suitably bitcast the inputs/output + // back to their original types. + if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() && + N1.getOpcode() == ISD::UNDEF && Level < AfterLegalizeVectorOps && + TLI.isTypeLegal(VT)) { + + // Peek through the bitcast only if there is one user. + SDValue BC0 = N0; + while (BC0.getOpcode() == ISD::BITCAST) { + if (!BC0.hasOneUse()) + break; + BC0 = BC0.getOperand(0); + } + + auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) { + if (Scale == 1) + return SmallVector<int, 8>(Mask.begin(), Mask.end()); + + SmallVector<int, 8> NewMask; + for (int M : Mask) + for (int s = 0; s != Scale; ++s) + NewMask.push_back(M < 0 ? -1 : Scale * M + s); + return NewMask; + }; + + if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) { + EVT SVT = VT.getScalarType(); + EVT InnerVT = BC0->getValueType(0); + EVT InnerSVT = InnerVT.getScalarType(); + + // Determine which shuffle works with the smaller scalar type. + EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT; + EVT ScaleSVT = ScaleVT.getScalarType(); + + if (TLI.isTypeLegal(ScaleVT) && + 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) && + 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) { + + int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits(); + int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits(); + + // Scale the shuffle masks to the smaller scalar type. + ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0); + SmallVector<int, 8> InnerMask = + ScaleShuffleMask(InnerSVN->getMask(), InnerScale); + SmallVector<int, 8> OuterMask = + ScaleShuffleMask(SVN->getMask(), OuterScale); + + // Merge the shuffle masks. + SmallVector<int, 8> NewMask; + for (int M : OuterMask) + NewMask.push_back(M < 0 ? -1 : InnerMask[M]); + + // Test for shuffle mask legality over both commutations. + SDValue SV0 = BC0->getOperand(0); + SDValue SV1 = BC0->getOperand(1); + bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT); + if (!LegalMask) { + std::swap(SV0, SV1); + ShuffleVectorSDNode::commuteMask(NewMask); + LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT); + } + + if (LegalMask) { + SV0 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV0); + SV1 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV1); + return DAG.getNode( + ISD::BITCAST, SDLoc(N), VT, + DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask)); + } + } + } + } + // Canonicalize shuffles according to rules: // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A) // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B) @@ -11981,16 +12185,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // Avoid introducing shuffles with illegal mask. if (!TLI.isShuffleMaskLegal(Mask, VT)) { - // Compute the commuted shuffle mask and test again. - for (unsigned i = 0; i != NumElts; ++i) { - int idx = Mask[i]; - if (idx < 0) - continue; - else if (idx < (int)NumElts) - Mask[i] = idx + NumElts; - else - Mask[i] = idx - NumElts; - } + ShuffleVectorSDNode::commuteMask(Mask); if (!TLI.isShuffleMaskLegal(Mask, VT)) return SDValue(); @@ -12010,6 +12205,34 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) { + SDValue InVal = N->getOperand(0); + EVT VT = N->getValueType(0); + + // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern + // with a VECTOR_SHUFFLE. + if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { + SDValue InVec = InVal->getOperand(0); + SDValue EltNo = InVal->getOperand(1); + + // FIXME: We could support implicit truncation if the shuffle can be + // scaled to a smaller vector scalar type. + ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo); + if (C0 && VT == InVec.getValueType() && + VT.getScalarType() == InVal.getValueType()) { + SmallVector<int, 8> NewMask(VT.getVectorNumElements(), -1); + int Elt = C0->getZExtValue(); + NewMask[0] = Elt; + + if (TLI.isShuffleMaskLegal(NewMask, VT)) + return DAG.getVectorShuffle(VT, SDLoc(N), InVec, DAG.getUNDEF(VT), + NewMask); + } + } + + return SDValue(); +} + SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N2 = N->getOperand(2); @@ -12043,44 +12266,51 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { /// vector_shuffle V, Zero, <0, 4, 2, 4> SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { EVT VT = N->getValueType(0); - SDLoc dl(N); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); - if (N->getOpcode() == ISD::AND) { - if (RHS.getOpcode() == ISD::BITCAST) - RHS = RHS.getOperand(0); - if (RHS.getOpcode() == ISD::BUILD_VECTOR) { - SmallVector<int, 8> Indices; - unsigned NumElts = RHS.getNumOperands(); - for (unsigned i = 0; i != NumElts; ++i) { - SDValue Elt = RHS.getOperand(i); - if (!isa<ConstantSDNode>(Elt)) - return SDValue(); + SDLoc dl(N); - if (cast<ConstantSDNode>(Elt)->isAllOnesValue()) - Indices.push_back(i); - else if (cast<ConstantSDNode>(Elt)->isNullValue()) - Indices.push_back(NumElts+i); - else - return SDValue(); - } + // Make sure we're not running after operation legalization where it + // may have custom lowered the vector shuffles. + if (LegalOperations) + return SDValue(); + + if (N->getOpcode() != ISD::AND) + return SDValue(); - // Let's see if the target supports this vector_shuffle and make sure - // we're not running after operation legalization where it may have - // custom lowered the vector shuffles. - EVT RVT = RHS.getValueType(); - if (LegalOperations || !TLI.isVectorClearMaskLegal(Indices, RVT)) + if (RHS.getOpcode() == ISD::BITCAST) + RHS = RHS.getOperand(0); + + if (RHS.getOpcode() == ISD::BUILD_VECTOR) { + SmallVector<int, 8> Indices; + unsigned NumElts = RHS.getNumOperands(); + + for (unsigned i = 0; i != NumElts; ++i) { + SDValue Elt = RHS.getOperand(i); + if (!isa<ConstantSDNode>(Elt)) return SDValue(); - // Return the new VECTOR_SHUFFLE node. - EVT EltVT = RVT.getVectorElementType(); - SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(), - DAG.getConstant(0, EltVT)); - SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), RVT, ZeroOps); - LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS); - SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]); - return DAG.getNode(ISD::BITCAST, dl, VT, Shuf); + if (cast<ConstantSDNode>(Elt)->isAllOnesValue()) + Indices.push_back(i); + else if (cast<ConstantSDNode>(Elt)->isNullValue()) + Indices.push_back(NumElts+i); + else + return SDValue(); } + + // Let's see if the target supports this vector_shuffle. + EVT RVT = RHS.getValueType(); + if (!TLI.isVectorClearMaskLegal(Indices, RVT)) + return SDValue(); + + // Return the new VECTOR_SHUFFLE node. + EVT EltVT = RVT.getVectorElementType(); + SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(), + DAG.getConstant(0, EltVT)); + SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), RVT, ZeroOps); + LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS); + SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]); + return DAG.getNode(ISD::BITCAST, dl, VT, Shuf); } return SDValue(); @@ -12093,8 +12323,9 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); - SDValue Shuffle = XformToShuffleWithZero(N); - if (Shuffle.getNode()) return Shuffle; + + if (SDValue Shuffle = XformToShuffleWithZero(N)) + return Shuffle; // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold // this operation. @@ -12172,38 +12403,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { return SDValue(); } -/// Visit a binary vector operation, like FABS/FNEG. -SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) { - assert(N->getValueType(0).isVector() && - "SimplifyVUnaryOp only works on vectors!"); - - SDValue N0 = N->getOperand(0); - - if (N0.getOpcode() != ISD::BUILD_VECTOR) - return SDValue(); - - // Operand is a BUILD_VECTOR node, see if we can constant fold it. - SmallVector<SDValue, 8> Ops; - for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) { - SDValue Op = N0.getOperand(i); - if (Op.getOpcode() != ISD::UNDEF && - Op.getOpcode() != ISD::ConstantFP) - break; - EVT EltVT = Op.getValueType(); - SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(N0), EltVT, Op); - if (FoldOp.getOpcode() != ISD::UNDEF && - FoldOp.getOpcode() != ISD::ConstantFP) - break; - Ops.push_back(FoldOp); - AddToWorklist(FoldOp.getNode()); - } - - if (Ops.size() != N0.getNumOperands()) - return SDValue(); - - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N0.getValueType(), Ops); -} - SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0, SDValue N1, SDValue N2){ assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!"); diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 1df4a1d..223a149 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -62,6 +62,7 @@ #include "llvm/IR/Operator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" @@ -497,7 +498,7 @@ bool FastISel::selectGetElementPtr(const User *I) { OI != E; ++OI) { const Value *Idx = *OI; if (auto *StTy = dyn_cast<StructType>(Ty)) { - unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); + uint64_t Field = cast<ConstantInt>(Idx)->getZExtValue(); if (Field) { // N = N + Offset TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field); @@ -518,8 +519,8 @@ bool FastISel::selectGetElementPtr(const User *I) { if (CI->isZero()) continue; // N = N + Offset - TotalOffs += - DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue(); + uint64_t IdxN = CI->getValue().sextOrTrunc(64).getSExtValue(); + TotalOffs += DL.getTypeAllocSize(Ty) * IdxN; if (TotalOffs >= MaxOffs) { N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); if (!N) // Unhandled operand. Halt "fast" selection and bail. @@ -801,7 +802,8 @@ bool FastISel::selectPatchpoint(const CallInst *I) { return false; // Push the register mask info. - Ops.push_back(MachineOperand::CreateRegMask(TRI.getCallPreservedMask(CC))); + Ops.push_back(MachineOperand::CreateRegMask( + TRI.getCallPreservedMask(*FuncInfo.MF, CC))); // Add scratch registers as implicit def and early clobber. const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC); diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 7e72dc6..291b583 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -31,6 +31,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 61c0a6f..ece38f3 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1442,13 +1442,27 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy()); StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr); + SDValue NewLoad; + if (Op.getValueType().isVector()) - return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr,MachinePointerInfo(), - false, false, false, 0); - return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, - MachinePointerInfo(), - Vec.getValueType().getVectorElementType(), - false, false, false, 0); + NewLoad = DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, + MachinePointerInfo(), false, false, false, 0); + else + NewLoad = DAG.getExtLoad( + ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, MachinePointerInfo(), + Vec.getValueType().getVectorElementType(), false, false, false, 0); + + // Replace the chain going out of the store, by the one out of the load. + DAG.ReplaceAllUsesOfValueWith(Ch, SDValue(NewLoad.getNode(), 1)); + + // We introduced a cycle though, so update the loads operands, making sure + // to use the original store's chain as an incoming chain. + SmallVector<SDValue, 6> NewLoadOperands(NewLoad->op_begin(), + NewLoad->op_end()); + NewLoadOperands[0] = Ch; + NewLoad = + SDValue(DAG.UpdateNodeOperands(NewLoad.getNode(), NewLoadOperands), 0); + return NewLoad; } SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { @@ -2817,132 +2831,8 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { unsigned Opc = Node->getOpcode(); MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT(); - RTLIB::Libcall LC; - - switch (Opc) { - default: - llvm_unreachable("Unhandled atomic intrinsic Expand!"); - case ISD::ATOMIC_SWAP: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_1; break; - case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break; - case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break; - case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break; - case MVT::i128:LC = RTLIB::SYNC_LOCK_TEST_AND_SET_16;break; - } - break; - case ISD::ATOMIC_CMP_SWAP: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1; break; - case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break; - case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break; - case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break; - case MVT::i128:LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_16;break; - } - break; - case ISD::ATOMIC_LOAD_ADD: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_ADD_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_ADD_16;break; - } - break; - case ISD::ATOMIC_LOAD_SUB: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_SUB_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_SUB_16;break; - } - break; - case ISD::ATOMIC_LOAD_AND: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_AND_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_AND_16;break; - } - break; - case ISD::ATOMIC_LOAD_OR: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_OR_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_OR_16;break; - } - break; - case ISD::ATOMIC_LOAD_XOR: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_XOR_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_XOR_16;break; - } - break; - case ISD::ATOMIC_LOAD_NAND: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_NAND_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_NAND_16;break; - } - break; - case ISD::ATOMIC_LOAD_MAX: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_MAX_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_MAX_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_MAX_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_MAX_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_MAX_16;break; - } - break; - case ISD::ATOMIC_LOAD_UMAX: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_UMAX_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_UMAX_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_UMAX_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_UMAX_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_UMAX_16;break; - } - break; - case ISD::ATOMIC_LOAD_MIN: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_MIN_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_MIN_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_MIN_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_MIN_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_MIN_16;break; - } - break; - case ISD::ATOMIC_LOAD_UMIN: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_UMIN_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_UMIN_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_UMIN_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_UMIN_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_UMIN_16;break; - } - break; - } + RTLIB::Libcall LC = RTLIB::getATOMIC(Opc, VT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!"); return ExpandChainLibCall(LC, Node, false); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 5507c70..25e80b9 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1116,7 +1116,6 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo){ - assert(OpNo == 2 && "Only know how to promote the mask!"); SDValue DataOp = N->getValue(); EVT DataVT = DataOp.getValueType(); SDValue Mask = N->getMask(); @@ -1127,7 +1126,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpN if (!TLI.isTypeLegal(DataVT)) { if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger) { DataOp = GetPromotedInteger(DataOp); - Mask = PromoteTargetBoolean(Mask, DataOp.getValueType()); + if (!TLI.isTypeLegal(MaskVT)) + Mask = PromoteTargetBoolean(Mask, DataOp.getValueType()); TruncateStore = true; } else { @@ -1323,92 +1323,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { unsigned Opc = Node->getOpcode(); MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT(); - RTLIB::Libcall LC; - - switch (Opc) { - default: - llvm_unreachable("Unhandled atomic intrinsic Expand!"); - case ISD::ATOMIC_SWAP: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_1; break; - case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break; - case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break; - case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break; - case MVT::i128:LC = RTLIB::SYNC_LOCK_TEST_AND_SET_16;break; - } - break; - case ISD::ATOMIC_CMP_SWAP: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1; break; - case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break; - case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break; - case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break; - case MVT::i128:LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_16;break; - } - break; - case ISD::ATOMIC_LOAD_ADD: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_ADD_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_ADD_16;break; - } - break; - case ISD::ATOMIC_LOAD_SUB: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_SUB_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_SUB_16;break; - } - break; - case ISD::ATOMIC_LOAD_AND: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_AND_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_AND_16;break; - } - break; - case ISD::ATOMIC_LOAD_OR: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_OR_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_OR_16;break; - } - break; - case ISD::ATOMIC_LOAD_XOR: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_XOR_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_XOR_16;break; - } - break; - case ISD::ATOMIC_LOAD_NAND: - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type for atomic!"); - case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_NAND_1; break; - case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break; - case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break; - case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break; - case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_NAND_16;break; - } - break; - } + RTLIB::Libcall LC = RTLIB::getATOMIC(Opc, VT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!"); return ExpandChainLibCall(LC, Node, false); } @@ -1417,12 +1333,19 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { /// and the shift amount is a constant 'Amt'. Expand the operation. void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt, SDValue &Lo, SDValue &Hi) { - assert(Amt && "Expected zero shifts to be already optimized away."); SDLoc DL(N); // Expand the incoming operand to be shifted, so that we have its parts SDValue InL, InH; GetExpandedInteger(N->getOperand(0), InL, InH); + // Though Amt shouldn't usually be 0, it's possible. E.g. when legalization + // splitted a vector shift, like this: <op1, op2> SHL <0, 2>. + if (!Amt) { + Lo = InL; + Hi = InH; + return; + } + EVT NVT = InL.getValueType(); unsigned VTBits = N->getValueType(0).getSizeInBits(); unsigned NVTBits = NVT.getSizeInBits(); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 63671f7..f7e4557 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -2553,6 +2553,16 @@ SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) { assert(InVT.isVector() && "can not widen non-vector type"); EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), WidenNumElts); + + // The input and output types often differ here, and it could be that while + // we'd prefer to widen the result type, the input operands have been split. + // In this case, we also need to split the result of this node as well. + if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) { + SDValue SplitVSetCC = SplitVecOp_VSETCC(N); + SDValue Res = ModifyToType(SplitVSetCC, WidenVT); + return Res; + } + InOp1 = GetWidenedVector(InOp1); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index db38b76..6303422 100644 --- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -47,7 +47,7 @@ ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) TRI = STI.getRegisterInfo(); TLI = IS->TLI; TII = STI.getInstrInfo(); - ResourcesModel = TII->CreateTargetScheduleState(STI); + ResourcesModel.reset(TII->CreateTargetScheduleState(STI)); // This hard requirement could be relaxed, but for now // do not let it procede. assert(ResourcesModel && "Unimplemented CreateTargetScheduleState."); @@ -637,17 +637,3 @@ void ResourcePriorityQueue::remove(SUnit *SU) { Queue.pop_back(); } - - -#ifdef NDEBUG -void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const {} -#else -void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const { - ResourcePriorityQueue q = *this; - while (!q.empty()) { - SUnit *su = q.pop(); - dbgs() << "Height " << su->getHeight() << ": "; - su->dump(DAG); - } -} -#endif diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 9466f4d..b52f648 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -196,6 +196,22 @@ bool ISD::isBuildVectorOfConstantSDNodes(const SDNode *N) { return true; } +/// \brief Return true if the specified node is a BUILD_VECTOR node of +/// all ConstantFPSDNode or undef. +bool ISD::isBuildVectorOfConstantFPSDNodes(const SDNode *N) { + if (N->getOpcode() != ISD::BUILD_VECTOR) + return false; + + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDValue Op = N->getOperand(i); + if (Op.getOpcode() == ISD::UNDEF) + continue; + if (!isa<ConstantFPSDNode>(Op)) + return false; + } + return true; +} + /// isScalarToVector - Return true if the specified node is a /// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low /// element is not an undef. @@ -1446,13 +1462,7 @@ SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) { // N2 to point at N1. static void commuteShuffle(SDValue &N1, SDValue &N2, SmallVectorImpl<int> &M) { std::swap(N1, N2); - int NElts = M.size(); - for (int i = 0; i != NElts; ++i) { - if (M[i] >= NElts) - M[i] -= NElts; - else if (M[i] >= 0) - M[i] += NElts; - } + ShuffleVectorSDNode::commuteMask(M); } SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, @@ -1625,19 +1635,8 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) { MVT VT = SV.getSimpleValueType(0); - unsigned NumElems = VT.getVectorNumElements(); - SmallVector<int, 8> MaskVec; - - for (unsigned i = 0; i != NumElems; ++i) { - int Idx = SV.getMaskElt(i); - if (Idx >= 0) { - if (Idx < (int)NumElems) - Idx += NumElems; - else - Idx -= NumElems; - } - MaskVec.push_back(Idx); - } + SmallVector<int, 8> MaskVec(SV.getMask().begin(), SV.getMask().end()); + ShuffleVectorSDNode::commuteMask(MaskVec); SDValue Op0 = SV.getOperand(0); SDValue Op1 = SV.getOperand(1); @@ -2844,7 +2843,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, } } - // Constant fold unary operations with a vector integer operand. + // Constant fold unary operations with a vector integer or float operand. if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Operand.getNode())) { if (BV->isConstant()) { switch (Opcode) { @@ -2852,18 +2851,25 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, // FIXME: Entirely reasonable to perform folding of other unary // operations here as the need arises. break; + case ISD::FNEG: + case ISD::FABS: + case ISD::FP_EXTEND: + case ISD::TRUNCATE: case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: { + // Let the above scalar folding handle the folding of each element. SmallVector<SDValue, 8> Ops; for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) { SDValue OpN = BV->getOperand(i); - // Let the above scalar folding handle the conversion of each - // element. - OpN = getNode(ISD::SINT_TO_FP, DL, VT.getVectorElementType(), - OpN); + OpN = getNode(Opcode, DL, VT.getVectorElementType(), OpN); + if (OpN.getOpcode() != ISD::UNDEF && + OpN.getOpcode() != ISD::Constant && + OpN.getOpcode() != ISD::ConstantFP) + break; Ops.push_back(OpN); } - return getNode(ISD::BUILD_VECTOR, DL, VT, Ops); + if (Ops.size() == VT.getVectorNumElements()) + return getNode(ISD::BUILD_VECTOR, DL, VT, Ops); } } } @@ -5418,17 +5424,9 @@ UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops) { assert(N->getNumOperands() == NumOps && "Update with wrong number of operands"); - // Check to see if there is no change. - bool AnyChange = false; - for (unsigned i = 0; i != NumOps; ++i) { - if (Ops[i] != N->getOperand(i)) { - AnyChange = true; - break; - } - } - - // No operands changed, just return the input node. - if (!AnyChange) return N; + // If no operands changed just return the input node. + if (Ops.empty() || std::equal(Ops.begin(), Ops.end(), N->op_begin())) + return N; // See if the modified node already exists. void *InsertPos = nullptr; @@ -6673,8 +6671,8 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { unsigned PtrWidth = TLI->getPointerTypeSizeInBits(GV->getType()); APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0); - llvm::computeKnownBits(const_cast<GlobalValue*>(GV), KnownZero, KnownOne, - TLI->getDataLayout()); + llvm::computeKnownBits(const_cast<GlobalValue *>(GV), KnownZero, KnownOne, + *TLI->getDataLayout()); unsigned AlignBits = KnownZero.countTrailingOnes(); unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0; if (Align) diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 097b618..6c14e79 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1016,6 +1016,24 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, } } +/// getCopyFromRegs - If there was virtual register allocated for the value V +/// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise. +SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) { + DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V); + SDValue res; + + if (It != FuncInfo.ValueMap.end()) { + unsigned InReg = It->second; + RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), InReg, + Ty); + SDValue Chain = DAG.getEntryNode(); + res = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); + resolveDanglingDebugInfo(V, res); + } + + return res; +} + /// getValue - Return an SDValue for the given Value. SDValue SelectionDAGBuilder::getValue(const Value *V) { // If we already have an SDValue for this value, use it. It's important @@ -1026,15 +1044,9 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { // If there's a virtual register allocated and initialized for this // value, use it. - DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V); - if (It != FuncInfo.ValueMap.end()) { - unsigned InReg = It->second; - RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), InReg, - V->getType()); - SDValue Chain = DAG.getEntryNode(); - N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); - resolveDanglingDebugInfo(V, N); - return N; + SDValue copyFromReg = getCopyFromRegs(V, V->getType()); + if (copyFromReg.getNode()) { + return copyFromReg; } // Otherwise create a new SDValue and remember it. @@ -1573,19 +1585,13 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // Update machine-CFG edges. MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)]; - // Figure out which block is immediately after the current one. - MachineBasicBlock *NextBlock = nullptr; - MachineFunction::iterator BBI = BrMBB; - if (++BBI != FuncInfo.MF->end()) - NextBlock = BBI; - if (I.isUnconditional()) { // Update machine-CFG edges. BrMBB->addSuccessor(Succ0MBB); // If this is not a fall-through branch or optimizations are switched off, // emit the branch. - if (Succ0MBB != NextBlock || TM.getOptLevel() == CodeGenOpt::None) + if (Succ0MBB != NextBlock(BrMBB) || TM.getOptLevel() == CodeGenOpt::None) DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(), DAG.getBasicBlock(Succ0MBB))); @@ -1682,7 +1688,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now"); const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue(); - const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue(); + const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue(); SDValue CmpOp = getValue(CB.CmpMHS); EVT VT = CmpOp.getValueType(); @@ -1705,16 +1711,9 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, if (CB.TrueBB != CB.FalseBB) addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight); - // Set NextBlock to be the MBB immediately after the current one, if any. - // This is used to avoid emitting unnecessary branches to the next block. - MachineBasicBlock *NextBlock = nullptr; - MachineFunction::iterator BBI = SwitchBB; - if (++BBI != FuncInfo.MF->end()) - NextBlock = BBI; - // If the lhs block is the next block, invert the condition so that we can // fall through to the lhs instead of the rhs block. - if (CB.TrueBB == NextBlock) { + if (CB.TrueBB == NextBlock(SwitchBB)) { std::swap(CB.TrueBB, CB.FalseBB); SDValue True = DAG.getConstant(1, Cond.getValueType()); Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True); @@ -1781,19 +1780,12 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, Sub.getValueType()), Sub, DAG.getConstant(JTH.Last - JTH.First, VT), ISD::SETUGT); - // Set NextBlock to be the MBB immediately after the current one, if any. - // This is used to avoid emitting unnecessary branches to the next block. - MachineBasicBlock *NextBlock = nullptr; - MachineFunction::iterator BBI = SwitchBB; - - if (++BBI != FuncInfo.MF->end()) - NextBlock = BBI; - SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurSDLoc(), MVT::Other, CopyTo, CMP, DAG.getBasicBlock(JT.Default)); - if (JT.MBB != NextBlock) + // Avoid emitting unnecessary branches to the next block. + if (JT.MBB != NextBlock(SwitchBB)) BrCond = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, BrCond, DAG.getBasicBlock(JT.MBB)); @@ -1922,13 +1914,6 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurSDLoc(), B.Reg, Sub); - // Set NextBlock to be the MBB immediately after the current one, if any. - // This is used to avoid emitting unnecessary branches to the next block. - MachineBasicBlock *NextBlock = nullptr; - MachineFunction::iterator BBI = SwitchBB; - if (++BBI != FuncInfo.MF->end()) - NextBlock = BBI; - MachineBasicBlock* MBB = B.Cases[0].ThisBB; addSuccessorWithWeight(SwitchBB, B.Default); @@ -1938,7 +1923,8 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, MVT::Other, CopyTo, RangeCmp, DAG.getBasicBlock(B.Default)); - if (MBB != NextBlock) + // Avoid emitting unnecessary branches to the next block. + if (MBB != NextBlock(SwitchBB)) BrRange = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, CopyTo, DAG.getBasicBlock(MBB)); @@ -1991,14 +1977,8 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, MVT::Other, getControlRoot(), Cmp, DAG.getBasicBlock(B.TargetBB)); - // Set NextBlock to be the MBB immediately after the current one, if any. - // This is used to avoid emitting unnecessary branches to the next block. - MachineBasicBlock *NextBlock = nullptr; - MachineFunction::iterator BBI = SwitchBB; - if (++BBI != FuncInfo.MF->end()) - NextBlock = BBI; - - if (NextMBB != NextBlock) + // Avoid emitting unnecessary branches to the next block. + if (NextMBB != NextBlock(SwitchBB)) BrAnd = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, BrAnd, DAG.getBasicBlock(NextMBB)); @@ -2027,13 +2007,20 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { case Intrinsic::experimental_patchpoint_i64: visitPatchpoint(&I, LandingPad); break; + case Intrinsic::experimental_gc_statepoint: + LowerStatepoint(ImmutableStatepoint(&I), LandingPad); + break; } } else LowerCallTo(&I, getValue(Callee), false, LandingPad); // If the value of the invoke is used outside of its defining block, make it // available as a virtual register. - CopyToExportRegsIfNeeded(&I); + // We already took care of the exported value for the statepoint instruction + // during call to the LowerStatepoint. + if (!isStatepoint(I)) { + CopyToExportRegsIfNeeded(&I); + } // Update successor info addSuccessorWithWeight(InvokeMBB, Return); @@ -2128,11 +2115,10 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, MachineFunction *CurMF = FuncInfo.MF; // Figure out which block is immediately after the current one. - MachineBasicBlock *NextBlock = nullptr; + MachineBasicBlock *NextMBB = nullptr; MachineFunction::iterator BBI = CR.CaseBB; - if (++BBI != FuncInfo.MF->end()) - NextBlock = BBI; + NextMBB = BBI; BranchProbabilityInfo *BPI = FuncInfo.BPI; // If any two of the cases has the same destination, and if one value @@ -2146,8 +2132,8 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, Case &Big = *(CR.Range.second-1); if (Small.Low == Small.High && Big.Low == Big.High && Small.BB == Big.BB) { - const APInt& SmallValue = cast<ConstantInt>(Small.Low)->getValue(); - const APInt& BigValue = cast<ConstantInt>(Big.Low)->getValue(); + const APInt& SmallValue = Small.Low->getValue(); + const APInt& BigValue = Big.Low->getValue(); // Check that there is only one bit different. if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 && @@ -2205,13 +2191,12 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, } // Rearrange the case blocks so that the last one falls through if possible. Case &BackCase = *(CR.Range.second-1); - if (Size > 1 && - NextBlock && Default != NextBlock && BackCase.BB != NextBlock) { - // The last case block won't fall through into 'NextBlock' if we emit the + if (Size > 1 && NextMBB && Default != NextMBB && BackCase.BB != NextMBB) { + // The last case block won't fall through into 'NextMBB' if we emit the // branches in this order. See if rearranging a case value would help. // We start at the bottom as it's the case with the least weight. for (Case *I = &*(CR.Range.second-2), *E = &*CR.Range.first-1; I != E; --I) - if (I->BB == NextBlock) { + if (I->BB == NextMBB) { std::swap(*I, BackCase); break; } @@ -2287,8 +2272,8 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, Case& FrontCase = *CR.Range.first; Case& BackCase = *(CR.Range.second-1); - const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue(); - const APInt &Last = cast<ConstantInt>(BackCase.High)->getValue(); + const APInt &First = FrontCase.Low->getValue(); + const APInt &Last = BackCase.High->getValue(); APInt TSize(First.getBitWidth(), 0); for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) @@ -2338,8 +2323,8 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, std::vector<MachineBasicBlock*> DestBBs; APInt TEI = First; for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) { - const APInt &Low = cast<ConstantInt>(I->Low)->getValue(); - const APInt &High = cast<ConstantInt>(I->High)->getValue(); + const APInt &Low = I->Low->getValue(); + const APInt &High = I->High->getValue(); if (Low.sle(TEI) && TEI.sle(High)) { DestBBs.push_back(I->BB); @@ -2352,26 +2337,19 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, // Calculate weight for each unique destination in CR. DenseMap<MachineBasicBlock*, uint32_t> DestWeights; - if (FuncInfo.BPI) - for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { - DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr = - DestWeights.find(I->BB); - if (Itr != DestWeights.end()) - Itr->second += I->ExtraWeight; - else - DestWeights[I->BB] = I->ExtraWeight; - } + if (FuncInfo.BPI) { + for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) + DestWeights[I->BB] += I->ExtraWeight; + } // Update successor info. Add one edge to each unique successor. BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs()); - for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(), - E = DestBBs.end(); I != E; ++I) { - if (!SuccsHandled[(*I)->getNumber()]) { - SuccsHandled[(*I)->getNumber()] = true; - DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr = - DestWeights.find(*I); - addSuccessorWithWeight(JumpTableBB, *I, - Itr != DestWeights.end() ? Itr->second : 0); + for (MachineBasicBlock *DestBB : DestBBs) { + if (!SuccsHandled[DestBB->getNumber()]) { + SuccsHandled[DestBB->getNumber()] = true; + auto I = DestWeights.find(DestBB); + addSuccessorWithWeight(JumpTableBB, DestBB, + I != DestWeights.end() ? I->second : 0); } } @@ -2403,8 +2381,8 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, // Size is the number of Cases represented by this range. unsigned Size = CR.Range.second - CR.Range.first; - const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue(); - const APInt &Last = cast<ConstantInt>(BackCase.High)->getValue(); + const APInt &First = FrontCase.Low->getValue(); + const APInt &Last = BackCase.High->getValue(); double FMetric = 0; CaseItr Pivot = CR.Range.first + Size/2; @@ -2423,8 +2401,8 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second; J!=E; ++I, ++J) { - const APInt &LEnd = cast<ConstantInt>(I->High)->getValue(); - const APInt &RBegin = cast<ConstantInt>(J->Low)->getValue(); + const APInt &LEnd = I->High->getValue(); + const APInt &RBegin = J->Low->getValue(); APInt Range = ComputeRange(LEnd, RBegin); assert((Range - 2ULL).isNonNegative() && "Invalid case distance"); @@ -2479,7 +2457,7 @@ void SelectionDAGBuilder::splitSwitchCase(CaseRec &CR, CaseItr Pivot, CaseRange LHSR(CR.Range.first, Pivot); CaseRange RHSR(Pivot, CR.Range.second); - const Constant *C = Pivot->Low; + const ConstantInt *C = Pivot->Low; MachineBasicBlock *FalseBB = nullptr, *TrueBB = nullptr; // We know that we branch to the LHS if the Value being switched on is @@ -2489,8 +2467,7 @@ void SelectionDAGBuilder::splitSwitchCase(CaseRec &CR, CaseItr Pivot, // Pivot's Value, then we can branch directly to the LHS's Target, // rather than creating a leaf node for it. if ((LHSR.second - LHSR.first) == 1 && LHSR.first->High == CR.GE && - cast<ConstantInt>(C)->getValue() == - (cast<ConstantInt>(CR.GE)->getValue() + 1LL)) { + C->getValue() == (CR.GE->getValue() + 1LL)) { TrueBB = LHSR.first->BB; } else { TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB); @@ -2506,8 +2483,7 @@ void SelectionDAGBuilder::splitSwitchCase(CaseRec &CR, CaseItr Pivot, // is CR.LT - 1, then we can branch directly to the target block for // the current Case Value, rather than emitting a RHS leaf node for it. if ((RHSR.second - RHSR.first) == 1 && CR.LT && - cast<ConstantInt>(RHSR.first->Low)->getValue() == - (cast<ConstantInt>(CR.LT)->getValue() - 1LL)) { + RHSR.first->Low->getValue() == (CR.LT->getValue() - 1LL)) { FalseBB = RHSR.first->BB; } else { FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB); @@ -2571,8 +2547,8 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, << "Total number of comparisons: " << numCmps << '\n'); // Compute span of values. - const APInt& minValue = cast<ConstantInt>(FrontCase.Low)->getValue(); - const APInt& maxValue = cast<ConstantInt>(BackCase.High)->getValue(); + const APInt& minValue = FrontCase.Low->getValue(); + const APInt& maxValue = BackCase.High->getValue(); APInt cmpRange = maxValue - minValue; DEBUG(dbgs() << "Compare range: " << cmpRange << '\n' @@ -2612,8 +2588,8 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, count++; } - const APInt& lowValue = cast<ConstantInt>(I->Low)->getValue(); - const APInt& highValue = cast<ConstantInt>(I->High)->getValue(); + const APInt& lowValue = I->Low->getValue(); + const APInt& highValue = I->High->getValue(); uint64_t lo = (lowValue - lowBound).getZExtValue(); uint64_t hi = (highValue - lowBound).getZExtValue(); @@ -2663,45 +2639,42 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, return true; } -/// Clusterify - Transform simple list of Cases into list of CaseRange's -void SelectionDAGBuilder::Clusterify(CaseVector& Cases, - const SwitchInst& SI) { +void SelectionDAGBuilder::Clusterify(CaseVector &Cases, const SwitchInst *SI) { BranchProbabilityInfo *BPI = FuncInfo.BPI; - // Start with "simple" cases. - for (SwitchInst::ConstCaseIt i : SI.cases()) { - const BasicBlock *SuccBB = i.getCaseSuccessor(); - MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB]; - - uint32_t ExtraWeight = - BPI ? BPI->getEdgeWeight(SI.getParent(), i.getSuccessorIndex()) : 0; - - Cases.push_back(Case(i.getCaseValue(), i.getCaseValue(), - SMBB, ExtraWeight)); - } - std::sort(Cases.begin(), Cases.end(), CaseCmp()); - - // Merge case into clusters - if (Cases.size() >= 2) - // Must recompute end() each iteration because it may be - // invalidated by erase if we hold on to it - for (CaseItr I = Cases.begin(), J = std::next(Cases.begin()); - J != Cases.end(); ) { - const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue(); - const APInt& currentValue = cast<ConstantInt>(I->High)->getValue(); - MachineBasicBlock* nextBB = J->BB; - MachineBasicBlock* currentBB = I->BB; - - // If the two neighboring cases go to the same destination, merge them - // into a single case. - if ((nextValue - currentValue == 1) && (currentBB == nextBB)) { - I->High = J->High; - I->ExtraWeight += J->ExtraWeight; - J = Cases.erase(J); - } else { - I = J++; - } + + // Extract cases from the switch and sort them. + typedef std::pair<const ConstantInt*, unsigned> CasePair; + std::vector<CasePair> Sorted; + Sorted.reserve(SI->getNumCases()); + for (auto I : SI->cases()) + Sorted.push_back(std::make_pair(I.getCaseValue(), I.getSuccessorIndex())); + std::sort(Sorted.begin(), Sorted.end(), [](CasePair a, CasePair b) { + return a.first->getValue().slt(b.first->getValue()); + }); + + // Merge adjacent cases with the same destination, build Cases vector. + assert(Cases.empty() && "Cases should be empty before Clusterify;"); + Cases.reserve(SI->getNumCases()); + MachineBasicBlock *PreviousSucc = nullptr; + for (CasePair &CP : Sorted) { + const ConstantInt *CaseVal = CP.first; + unsigned SuccIndex = CP.second; + MachineBasicBlock *Succ = FuncInfo.MBBMap[SI->getSuccessor(SuccIndex)]; + uint32_t Weight = BPI ? BPI->getEdgeWeight(SI->getParent(), SuccIndex) : 0; + + if (PreviousSucc == Succ && + (CaseVal->getValue() - Cases.back().High->getValue()) == 1) { + // If this case has the same successor and is a neighbour, merge it into + // the previous cluster. + Cases.back().High = CaseVal; + Cases.back().ExtraWeight += Weight; + } else { + Cases.push_back(Case(CaseVal, CaseVal, Succ, Weight)); } + PreviousSucc = Succ; + } + DEBUG({ size_t numCmps = 0; for (auto &I : Cases) @@ -2729,16 +2702,10 @@ void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First, void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { MachineBasicBlock *SwitchMBB = FuncInfo.MBB; - // Figure out which block is immediately after the current one. - MachineBasicBlock *NextBlock = nullptr; - if (SwitchMBB + 1 != FuncInfo.MF->end()) - NextBlock = SwitchMBB + 1; - - // Create a vector of Cases, sorted so that we can efficiently create a binary // search tree from them. CaseVector Cases; - Clusterify(Cases, SI); + Clusterify(Cases, &SI); // Get the default destination MBB. MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()]; @@ -2775,7 +2742,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { SwitchMBB->addSuccessor(Default); // If this is not a fall-through branch, emit the branch. - if (Default != NextBlock) { + if (Default != NextBlock(SwitchMBB)) { DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(), DAG.getBasicBlock(Default))); } @@ -3429,30 +3396,21 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { Ty = StTy->getElementType(Field); } else { Ty = cast<SequentialType>(Ty)->getElementType(); + MVT PtrTy = DAG.getTargetLoweringInfo().getPointerTy(AS); + unsigned PtrSize = PtrTy.getSizeInBits(); + APInt ElementSize(PtrSize, DL->getTypeAllocSize(Ty)); // If this is a constant subscript, handle it quickly. - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { - if (CI->isZero()) continue; - uint64_t Offs = - DL->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); - SDValue OffsVal; - EVT PTy = TLI.getPointerTy(AS); - unsigned PtrBits = PTy.getSizeInBits(); - if (PtrBits < 64) - OffsVal = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), PTy, - DAG.getConstant(Offs, MVT::i64)); - else - OffsVal = DAG.getConstant(Offs, PTy); - - N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, - OffsVal); + if (const auto *CI = dyn_cast<ConstantInt>(Idx)) { + if (CI->isZero()) + continue; + APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize); + SDValue OffsVal = DAG.getConstant(Offs, PtrTy); + N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, OffsVal); continue; } // N = N + Idx * ElementSize; - APInt ElementSize = - APInt(TLI.getPointerSizeInBits(AS), DL->getTypeAllocSize(Ty)); SDValue IdxN = getValue(Idx); // If the index is smaller or larger than intptr_t, truncate or extend @@ -3988,6 +3946,93 @@ getF32Constant(SelectionDAG &DAG, unsigned Flt) { MVT::f32); } +static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl, + SelectionDAG &DAG) { + // IntegerPartOfX = ((int32_t)(t0); + SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); + + // FractionalPartOfX = t0 - (float)IntegerPartOfX; + SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); + SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1); + + // IntegerPartOfX <<= 23; + IntegerPartOfX = DAG.getNode( + ISD::SHL, dl, MVT::i32, IntegerPartOfX, + DAG.getConstant(23, DAG.getTargetLoweringInfo().getPointerTy())); + + SDValue TwoToFractionalPartOfX; + if (LimitFloatPrecision <= 6) { + // For floating-point precision of 6: + // + // TwoToFractionalPartOfX = + // 0.997535578f + + // (0.735607626f + 0.252464424f * x) * x; + // + // error 0.0144103317, which is 6 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3e814304)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f3c50c8)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f7f5e7e)); + } else if (LimitFloatPrecision <= 12) { + // For floating-point precision of 12: + // + // TwoToFractionalPartOfX = + // 0.999892986f + + // (0.696457318f + + // (0.224338339f + 0.792043434e-1f * x) * x) * x; + // + // error 0.000107046256, which is 13 to 14 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3da235e3)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3e65b8f3)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f324b07)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3f7ff8fd)); + } else { // LimitFloatPrecision <= 18 + // For floating-point precision of 18: + // + // TwoToFractionalPartOfX = + // 0.999999982f + + // (0.693148872f + + // (0.240227044f + + // (0.554906021e-1f + + // (0.961591928e-2f + + // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; + // error 2.47208000*10^(-7), which is better than 18 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3924b03e)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3ab24b87)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3c1d8c17)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3d634a1d)); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, + getF32Constant(DAG, 0x3e75fe14)); + SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); + SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, + getF32Constant(DAG, 0x3f317234)); + SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); + TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, + getF32Constant(DAG, 0x3f800000)); + } + + // Add the exponent into the result in integer domain. + SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFractionalPartOfX); + return DAG.getNode(ISD::BITCAST, dl, MVT::f32, + DAG.getNode(ISD::ADD, dl, MVT::i32, t13, IntegerPartOfX)); +} + /// expandExp - Lower an exp intrinsic. Handles the special sequences for /// limited-precision mode. static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG, @@ -3999,92 +4044,10 @@ static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG, // final result: // // #define LOG2OFe 1.4426950f - // IntegerPartOfX = ((int32_t)(X * LOG2OFe)); + // t0 = Op * LOG2OFe SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op, getF32Constant(DAG, 0x3fb8aa3b)); - SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); - - // FractionalPartOfX = (X * LOG2OFe) - (float)IntegerPartOfX; - SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); - SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1); - - // IntegerPartOfX <<= 23; - IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, - DAG.getConstant(23, TLI.getPointerTy())); - - SDValue TwoToFracPartOfX; - if (LimitFloatPrecision <= 6) { - // For floating-point precision of 6: - // - // TwoToFractionalPartOfX = - // 0.997535578f + - // (0.735607626f + 0.252464424f * x) * x; - // - // error 0.0144103317, which is 6 bits - SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3e814304)); - SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3f3c50c8)); - SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); - TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f7f5e7e)); - } else if (LimitFloatPrecision <= 12) { - // For floating-point precision of 12: - // - // TwoToFractionalPartOfX = - // 0.999892986f + - // (0.696457318f + - // (0.224338339f + 0.792043434e-1f * x) * x) * x; - // - // 0.000107046256 error, which is 13 to 14 bits - SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3da235e3)); - SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3e65b8f3)); - SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); - SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f324b07)); - SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); - TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3f7ff8fd)); - } else { // LimitFloatPrecision <= 18 - // For floating-point precision of 18: - // - // TwoToFractionalPartOfX = - // 0.999999982f + - // (0.693148872f + - // (0.240227044f + - // (0.554906021e-1f + - // (0.961591928e-2f + - // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; - // - // error 2.47208000*10^(-7), which is better than 18 bits - SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3924b03e)); - SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3ab24b87)); - SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); - SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3c1d8c17)); - SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); - SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3d634a1d)); - SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); - SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, - getF32Constant(DAG, 0x3e75fe14)); - SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); - SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, - getF32Constant(DAG, 0x3f317234)); - SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); - TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, - getF32Constant(DAG, 0x3f800000)); - } - - // Add the exponent into the result in integer domain. - SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFracPartOfX); - return DAG.getNode(ISD::BITCAST, dl, MVT::f32, - DAG.getNode(ISD::ADD, dl, MVT::i32, - t13, IntegerPartOfX)); + return getLimitedPrecisionExp2(t0, dl, DAG); } // No special expansion. @@ -4375,91 +4338,8 @@ static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG, static SDValue expandExp2(SDLoc dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { if (Op.getValueType() == MVT::f32 && - LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { - SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op); - - // FractionalPartOfX = x - (float)IntegerPartOfX; - SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); - SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, Op, t1); - - // IntegerPartOfX <<= 23; - IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, - DAG.getConstant(23, TLI.getPointerTy())); - - SDValue TwoToFractionalPartOfX; - if (LimitFloatPrecision <= 6) { - // For floating-point precision of 6: - // - // TwoToFractionalPartOfX = - // 0.997535578f + - // (0.735607626f + 0.252464424f * x) * x; - // - // error 0.0144103317, which is 6 bits - SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3e814304)); - SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3f3c50c8)); - SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); - TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f7f5e7e)); - } else if (LimitFloatPrecision <= 12) { - // For floating-point precision of 12: - // - // TwoToFractionalPartOfX = - // 0.999892986f + - // (0.696457318f + - // (0.224338339f + 0.792043434e-1f * x) * x) * x; - // - // error 0.000107046256, which is 13 to 14 bits - SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3da235e3)); - SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3e65b8f3)); - SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); - SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f324b07)); - SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); - TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3f7ff8fd)); - } else { // LimitFloatPrecision <= 18 - // For floating-point precision of 18: - // - // TwoToFractionalPartOfX = - // 0.999999982f + - // (0.693148872f + - // (0.240227044f + - // (0.554906021e-1f + - // (0.961591928e-2f + - // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; - // error 2.47208000*10^(-7), which is better than 18 bits - SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3924b03e)); - SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3ab24b87)); - SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); - SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3c1d8c17)); - SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); - SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3d634a1d)); - SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); - SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, - getF32Constant(DAG, 0x3e75fe14)); - SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); - SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, - getF32Constant(DAG, 0x3f317234)); - SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); - TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, - getF32Constant(DAG, 0x3f800000)); - } - - // Add the exponent into the result in integer domain. - SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, - TwoToFractionalPartOfX); - return DAG.getNode(ISD::BITCAST, dl, MVT::f32, - DAG.getNode(ISD::ADD, dl, MVT::i32, - t13, IntegerPartOfX)); - } + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) + return getLimitedPrecisionExp2(Op, dl, DAG); // No special expansion. return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op); @@ -4483,90 +4363,10 @@ static SDValue expandPow(SDLoc dl, SDValue LHS, SDValue RHS, // final result: // // #define LOG2OF10 3.3219281f - // IntegerPartOfX = (int32_t)(x * LOG2OF10); + // t0 = Op * LOG2OF10; SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS, getF32Constant(DAG, 0x40549a78)); - SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); - - // FractionalPartOfX = x - (float)IntegerPartOfX; - SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); - SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1); - - // IntegerPartOfX <<= 23; - IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, - DAG.getConstant(23, TLI.getPointerTy())); - - SDValue TwoToFractionalPartOfX; - if (LimitFloatPrecision <= 6) { - // For floating-point precision of 6: - // - // twoToFractionalPartOfX = - // 0.997535578f + - // (0.735607626f + 0.252464424f * x) * x; - // - // error 0.0144103317, which is 6 bits - SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3e814304)); - SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3f3c50c8)); - SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); - TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f7f5e7e)); - } else if (LimitFloatPrecision <= 12) { - // For floating-point precision of 12: - // - // TwoToFractionalPartOfX = - // 0.999892986f + - // (0.696457318f + - // (0.224338339f + 0.792043434e-1f * x) * x) * x; - // - // error 0.000107046256, which is 13 to 14 bits - SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3da235e3)); - SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3e65b8f3)); - SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); - SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f324b07)); - SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); - TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3f7ff8fd)); - } else { // LimitFloatPrecision <= 18 - // For floating-point precision of 18: - // - // TwoToFractionalPartOfX = - // 0.999999982f + - // (0.693148872f + - // (0.240227044f + - // (0.554906021e-1f + - // (0.961591928e-2f + - // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; - // error 2.47208000*10^(-7), which is better than 18 bits - SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, - getF32Constant(DAG, 0x3924b03e)); - SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3ab24b87)); - SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); - SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3c1d8c17)); - SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); - SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3d634a1d)); - SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); - SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, - getF32Constant(DAG, 0x3e75fe14)); - SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); - SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, - getF32Constant(DAG, 0x3f317234)); - SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); - TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, - getF32Constant(DAG, 0x3f800000)); - } - - SDValue t13 = DAG.getNode(ISD::BITCAST, dl,MVT::i32,TwoToFractionalPartOfX); - return DAG.getNode(ISD::BITCAST, dl, MVT::f32, - DAG.getNode(ISD::ADD, dl, MVT::i32, - t13, IntegerPartOfX)); + return getLimitedPrecisionExp2(t0, dl, DAG); } // No special expansion. @@ -5114,34 +4914,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { setValue(&I, Res); return nullptr; } - case Intrinsic::x86_avx_vinsertf128_pd_256: - case Intrinsic::x86_avx_vinsertf128_ps_256: - case Intrinsic::x86_avx_vinsertf128_si_256: - case Intrinsic::x86_avx2_vinserti128: { - EVT DestVT = TLI.getValueType(I.getType()); - EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType()); - uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue() & 1) * - ElVT.getVectorNumElements(); - Res = - DAG.getNode(ISD::INSERT_SUBVECTOR, sdl, DestVT, - getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), - DAG.getConstant(Idx, TLI.getVectorIdxTy())); - setValue(&I, Res); - return nullptr; - } - case Intrinsic::x86_avx_vextractf128_pd_256: - case Intrinsic::x86_avx_vextractf128_ps_256: - case Intrinsic::x86_avx_vextractf128_si_256: - case Intrinsic::x86_avx2_vextracti128: { - EVT DestVT = TLI.getValueType(I.getType()); - uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(1))->getZExtValue() & 1) * - DestVT.getVectorNumElements(); - Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, sdl, DestVT, - getValue(I.getArgOperand(0)), - DAG.getConstant(Idx, TLI.getVectorIdxTy())); - setValue(&I, Res); - return nullptr; - } case Intrinsic::convertff: case Intrinsic::convertfsi: case Intrinsic::convertfui: @@ -5539,7 +5311,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; SmallVector<Value *, 4> Allocas; - GetUnderlyingObjects(I.getArgOperand(1), Allocas, DL); + GetUnderlyingObjects(I.getArgOperand(1), Allocas, *DL); for (SmallVectorImpl<Value*>::iterator Object = Allocas.begin(), E = Allocas.end(); Object != E; ++Object) { @@ -5618,45 +5390,47 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::instrprof_increment: llvm_unreachable("instrprof failed to lower an increment"); - case Intrinsic::frameallocate: { + case Intrinsic::frameescape: { MachineFunction &MF = DAG.getMachineFunction(); const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); - // Do the allocation and map it as a normal value. - // FIXME: Maybe we should add this to the alloca map so that we don't have - // to register allocate it? - uint64_t Size = cast<ConstantInt>(I.getArgOperand(0))->getZExtValue(); - int Alloc = MF.getFrameInfo()->CreateFrameAllocation(Size); - MVT PtrVT = TLI.getPointerTy(0); - SDValue FIVal = DAG.getFrameIndex(Alloc, PtrVT); - setValue(&I, FIVal); - - // Directly emit a FRAME_ALLOC machine instr. Label assignment emission is - // the same on all targets. - MCSymbol *FrameAllocSym = - MF.getMMI().getContext().getOrCreateFrameAllocSymbol(MF.getName()); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl, - TII->get(TargetOpcode::FRAME_ALLOC)) - .addSym(FrameAllocSym) - .addFrameIndex(Alloc); + // Directly emit some FRAME_ALLOC machine instrs. Label assignment emission + // is the same on all targets. + for (unsigned Idx = 0, E = I.getNumArgOperands(); Idx < E; ++Idx) { + AllocaInst *Slot = + cast<AllocaInst>(I.getArgOperand(Idx)->stripPointerCasts()); + assert(FuncInfo.StaticAllocaMap.count(Slot) && + "can only escape static allocas"); + int FI = FuncInfo.StaticAllocaMap[Slot]; + MCSymbol *FrameAllocSym = + MF.getMMI().getContext().getOrCreateFrameAllocSymbol(MF.getName(), + Idx); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl, + TII->get(TargetOpcode::FRAME_ALLOC)) + .addSym(FrameAllocSym) + .addFrameIndex(FI); + } return nullptr; } case Intrinsic::framerecover: { - // i8* @llvm.framerecover(i8* %fn, i8* %fp) + // i8* @llvm.framerecover(i8* %fn, i8* %fp, i32 %idx) MachineFunction &MF = DAG.getMachineFunction(); MVT PtrVT = TLI.getPointerTy(0); // Get the symbol that defines the frame offset. - Function *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts()); + auto *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts()); + auto *Idx = cast<ConstantInt>(I.getArgOperand(2)); + unsigned IdxVal = unsigned(Idx->getLimitedValue(INT_MAX)); MCSymbol *FrameAllocSym = - MF.getMMI().getContext().getOrCreateFrameAllocSymbol(Fn->getName()); + MF.getMMI().getContext().getOrCreateFrameAllocSymbol(Fn->getName(), + IdxVal); // Create a TargetExternalSymbol for the label to avoid any target lowering // that would make this PC relative. StringRef Name = FrameAllocSym->getName(); - assert(Name.size() == strlen(Name.data()) && "not null terminated"); + assert(Name.data()[Name.size()] == '\0' && "not null terminated"); SDValue OffsetSym = DAG.getTargetExternalSymbol(Name.data(), PtrVT); SDValue OffsetVal = DAG.getNode(ISD::FRAME_ALLOC_RECOVER, sdl, PtrVT, OffsetSym); @@ -5672,6 +5446,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::eh_begincatch: case Intrinsic::eh_endcatch: llvm_unreachable("begin/end catch intrinsics not lowered in codegen"); + case Intrinsic::eh_unwindhelp: { + AllocaInst *Slot = + cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts()); + assert(FuncInfo.StaticAllocaMap.count(Slot) && + "can only use static allocas with llvm.eh.unwindhelp"); + int FI = FuncInfo.StaticAllocaMap[Slot]; + // TODO: Save this in the not-yet-existant WinEHFuncInfo struct. + (void)FI; + return nullptr; + } } } @@ -5805,9 +5589,8 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput), PointerType::getUnqual(LoadTy)); - if (const Constant *LoadCst = - ConstantFoldLoadFromConstPtr(const_cast<Constant *>(LoadInput), - Builder.DL)) + if (const Constant *LoadCst = ConstantFoldLoadFromConstPtr( + const_cast<Constant *>(LoadInput), *Builder.DL)) return Builder.getValue(LoadCst); } @@ -6748,10 +6531,15 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Memory output, or 'other' output (e.g. 'X' constraint). assert(OpInfo.isIndirect && "Memory output must be indirect operand"); + unsigned ConstraintID = + TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode); + assert(ConstraintID != InlineAsm::Constraint_Unknown && + "Failed to convert memory constraint code to constraint id."); + // Add information to the INLINEASM node to know about this output. unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); - AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, - TLI.getPointerTy())); + OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID); + AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, MVT::i32)); AsmNodeOperands.push_back(OpInfo.CallOperand); break; } @@ -6855,6 +6643,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { "Unexpected number of operands"); // Add information to the INLINEASM node to know about this input. // See InlineAsm.h isUseOperandTiedToDef. + OpFlag = InlineAsm::convertMemFlagWordToMatchingFlagWord(OpFlag); OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag, OpInfo.getMatchedOperand()); AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag, @@ -6894,10 +6683,15 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { assert(InOperandVal.getValueType() == TLI.getPointerTy() && "Memory operands expect pointer values"); + unsigned ConstraintID = + TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode); + assert(ConstraintID != InlineAsm::Constraint_Unknown && + "Failed to convert memory constraint code to constraint id."); + // Add information to the INLINEASM node to know about this input. unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); - AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, - TLI.getPointerTy())); + ResOpType = InlineAsm::getFlagWordForMem(ResOpType, ConstraintID); + AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, MVT::i32)); AsmNodeOperands.push_back(InOperandVal); break; } @@ -7901,8 +7695,8 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled; - // Check successor nodes' PHI nodes that expect a constant to be available - // from this block. + // Check PHI nodes in successors that expect a value to be available from this + // block. for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { const BasicBlock *SuccBB = TI->getSuccessor(succ); if (!isa<PHINode>(SuccBB->begin())) continue; @@ -7989,3 +7783,10 @@ AddSuccessorMBB(const BasicBlock *BB, SuccMBB, BranchProbabilityInfo::getBranchWeightStackProtector(IsLikely)); return SuccMBB; } + +MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) { + MachineFunction::iterator I = MBB; + if (++I == FuncInfo.MF->end()) + return nullptr; + return I; +} diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index ad7411f..30240d8 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -137,19 +137,19 @@ private: /// Case - A struct to record the Value for a switch case, and the /// case's target basic block. struct Case { - const Constant *Low; - const Constant *High; + const ConstantInt *Low; + const ConstantInt *High; MachineBasicBlock* BB; uint32_t ExtraWeight; Case() : Low(nullptr), High(nullptr), BB(nullptr), ExtraWeight(0) { } - Case(const Constant *low, const Constant *high, MachineBasicBlock *bb, + Case(const ConstantInt *low, const ConstantInt *high, MachineBasicBlock *bb, uint32_t extraweight) : Low(low), High(high), BB(bb), ExtraWeight(extraweight) { } APInt size() const { - const APInt &rHigh = cast<ConstantInt>(High)->getValue(); - const APInt &rLow = cast<ConstantInt>(Low)->getValue(); + const APInt &rHigh = High->getValue(); + const APInt &rLow = Low->getValue(); return (rHigh - rLow + 1ULL); } }; @@ -173,7 +173,7 @@ private: /// CaseRec - A struct with ctor used in lowering switches to a binary tree /// of conditional branches. struct CaseRec { - CaseRec(MachineBasicBlock *bb, const Constant *lt, const Constant *ge, + CaseRec(MachineBasicBlock *bb, const ConstantInt *lt, const ConstantInt *ge, CaseRange r) : CaseBB(bb), LT(lt), GE(ge), Range(r) {} @@ -181,8 +181,8 @@ private: MachineBasicBlock *CaseBB; /// LT, GE - If nonzero, we know the current case value must be less-than or /// greater-than-or-equal-to these Constants. - const Constant *LT; - const Constant *GE; + const ConstantInt *LT; + const ConstantInt *GE; /// Range - A pair of iterators representing the range of case values to be /// processed at this point in the binary search tree. CaseRange Range; @@ -190,24 +190,15 @@ private: typedef std::vector<CaseRec> CaseRecVector; - /// The comparison function for sorting the switch case values in the vector. - /// WARNING: Case ranges should be disjoint! - struct CaseCmp { - bool operator()(const Case &C1, const Case &C2) { - assert(isa<ConstantInt>(C1.Low) && isa<ConstantInt>(C2.High)); - const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low); - const ConstantInt* CI2 = cast<const ConstantInt>(C2.High); - return CI1->getValue().slt(CI2->getValue()); - } - }; - struct CaseBitsCmp { bool operator()(const CaseBits &C1, const CaseBits &C2) { return C1.Bits > C2.Bits; } }; - void Clusterify(CaseVector &Cases, const SwitchInst &SI); + /// Populate Cases with the cases in SI, clustering adjacent cases with the + /// same destination together. + void Clusterify(CaseVector &Cases, const SwitchInst *SI); /// CaseBlock - This structure is used to communicate between /// SelectionDAGBuilder and SDISel for the code generation of additional basic @@ -606,6 +597,10 @@ public: void visit(unsigned Opcode, const User &I); + /// getCopyFromRegs - If there was virtual register allocated for the value V + /// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise. + SDValue getCopyFromRegs(const Value *V, Type *Ty); + // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V, // generate the debug data structures now that we've seen its definition. void resolveDanglingDebugInfo(const Value *V, SDValue Val); @@ -622,8 +617,7 @@ public: void removeValue(const Value *V) { // This is to support hack in lowerCallFromStatepoint // Should be removed when hack is resolved - if (NodeMap.count(V)) - NodeMap.erase(V); + NodeMap.erase(V); } void setUnusedArgValue(const Value *V, SDValue NewN) { @@ -662,7 +656,9 @@ public: void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last); // This function is responsible for the whole statepoint lowering process. - void LowerStatepoint(ImmutableStatepoint Statepoint); + // It uniformly handles invoke and call statepoints. + void LowerStatepoint(ImmutableStatepoint Statepoint, + MachineBasicBlock *LandingPad = nullptr); private: std::pair<SDValue, SDValue> lowerInvokable( TargetLowering::CallLoweringInfo &CLI, @@ -830,6 +826,9 @@ private: bool EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, MDNode *Expr, int64_t Offset, bool IsIndirect, const SDValue &N); + + /// Return the next block after MBB, or nullptr if there is none. + MachineBasicBlock *NextBlock(MachineBasicBlock *MBB); }; } // end namespace llvm diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 17eff94..5898da4 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -95,6 +95,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE"; case ISD::RETURNADDR: return "RETURNADDR"; case ISD::FRAMEADDR: return "FRAMEADDR"; + case ISD::FRAME_ALLOC_RECOVER: return "FRAME_ALLOC_RECOVER"; case ISD::READ_REGISTER: return "READ_REGISTER"; case ISD::WRITE_REGISTER: return "WRITE_REGISTER"; case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET"; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 5e867cf..4d2af3f 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -168,14 +168,13 @@ static cl::opt<bool> EnableFastISelVerbose("fast-isel-verbose", cl::Hidden, cl::desc("Enable verbose messages in the \"fast\" " "instruction selector")); -static cl::opt<bool> -EnableFastISelAbort("fast-isel-abort", cl::Hidden, - cl::desc("Enable abort calls when \"fast\" instruction selection " - "fails to lower an instruction")); -static cl::opt<bool> -EnableFastISelAbortArgs("fast-isel-abort-args", cl::Hidden, - cl::desc("Enable abort calls when \"fast\" instruction selection " - "fails to lower a formal argument")); +static cl::opt<int> EnableFastISelAbort( + "fast-isel-abort", cl::Hidden, + cl::desc("Enable abort calls when \"fast\" instruction selection " + "fails to lower an instruction: 0 disable the abort, 1 will " + "abort but for args, calls and terminators, 2 will also " + "abort for argument lowering, and 3 will never fallback " + "to SelectionDAG.")); static cl::opt<bool> UseMBPI("use-mbpi", @@ -293,7 +292,8 @@ namespace llvm { const TargetLowering *TLI = IS->TLI; const TargetSubtargetInfo &ST = IS->MF->getSubtarget(); - if (OptLevel == CodeGenOpt::None || ST.useMachineScheduler() || + if (OptLevel == CodeGenOpt::None || + (ST.enableMachineScheduler() && ST.enableMachineSchedDefaultSched()) || TLI->getSchedulingPreference() == Sched::Source) return createSourceListDAGScheduler(IS, OptLevel); if (TLI->getSchedulingPreference() == Sched::RegPressure) @@ -416,7 +416,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { assert((!EnableFastISelVerbose || TM.Options.EnableFastISel) && "-fast-isel-verbose requires -fast-isel"); assert((!EnableFastISelAbort || TM.Options.EnableFastISel) && - "-fast-isel-abort requires -fast-isel"); + "-fast-isel-abort > 0 requires -fast-isel"); const Function &Fn = *mf.getFunction(); MF = &mf; @@ -595,9 +595,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, BasicBlock::const_iterator End, bool &HadTailCall) { - // Lower all of the non-terminator instructions. If a call is emitted - // as a tail call, cease emitting nodes for this block. Terminators - // are handled below. + // Lower the instructions. If a call is emitted as a tail call, cease emitting + // nodes for this block. for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I) SDB->visit(*I); @@ -1182,8 +1181,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { if (!FastIS->lowerArguments()) { // Fast isel failed to lower these arguments ++NumFastIselFailLowerArguments; - if (EnableFastISelAbortArgs) - llvm_unreachable("FastISel didn't lower all arguments"); + if (EnableFastISelAbort > 1) + report_fatal_error("FastISel didn't lower all arguments"); // Use SelectionDAG argument lowering LowerArguments(Fn); @@ -1252,6 +1251,10 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { dbgs() << "FastISel missed call: "; Inst->dump(); } + if (EnableFastISelAbort > 2) + // FastISel selector couldn't handle something and bailed. + // For the purpose of debugging, just abort. + report_fatal_error("FastISel didn't select the entire block"); if (!Inst->getType()->isVoidTy() && !Inst->use_empty()) { unsigned &R = FuncInfo->ValueMap[Inst]; @@ -1279,24 +1282,24 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { continue; } - if (isa<TerminatorInst>(Inst) && !isa<BranchInst>(Inst)) { - // Don't abort, and use a different message for terminator misses. - NumFastIselFailures += NumFastIselRemaining; - if (EnableFastISelVerbose || EnableFastISelAbort) { + bool ShouldAbort = EnableFastISelAbort; + if (EnableFastISelVerbose || EnableFastISelAbort) { + if (isa<TerminatorInst>(Inst)) { + // Use a different message for terminator misses. dbgs() << "FastISel missed terminator: "; - Inst->dump(); - } - } else { - NumFastIselFailures += NumFastIselRemaining; - if (EnableFastISelVerbose || EnableFastISelAbort) { + // Don't abort unless for terminator unless the level is really high + ShouldAbort = (EnableFastISelAbort > 2); + } else { dbgs() << "FastISel miss: "; - Inst->dump(); } - if (EnableFastISelAbort) - // The "fast" selector couldn't handle something and bailed. - // For the purpose of debugging, just abort. - llvm_unreachable("FastISel didn't select the entire block"); + Inst->dump(); } + if (ShouldAbort) + // FastISel selector couldn't handle something and bailed. + // For the purpose of debugging, just abort. + report_fatal_error("FastISel didn't select the entire block"); + + NumFastIselFailures += NumFastIselRemaining; break; } @@ -1775,9 +1778,23 @@ SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) { } else { assert(InlineAsm::getNumOperandRegisters(Flags) == 1 && "Memory operand with multiple values?"); + + unsigned TiedToOperand; + if (InlineAsm::isUseOperandTiedToDef(Flags, TiedToOperand)) { + // We need the constraint ID from the operand this is tied to. + unsigned CurOp = InlineAsm::Op_FirstOperand; + Flags = cast<ConstantSDNode>(InOps[CurOp])->getZExtValue(); + for (; TiedToOperand; --TiedToOperand) { + CurOp += InlineAsm::getNumOperandRegisters(Flags)+1; + Flags = cast<ConstantSDNode>(InOps[CurOp])->getZExtValue(); + } + } + // Otherwise, this is a memory operand. Ask the target to select it. std::vector<SDValue> SelOps; - if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps)) + if (SelectInlineAsmMemoryOperand(InOps[i+1], + InlineAsm::getMemoryConstraintID(Flags), + SelOps)) report_fatal_error("Could not match memory address. Inline asm" " failure!"); @@ -1933,7 +1950,7 @@ SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) { std::vector<SDValue> Ops(N->op_begin(), N->op_end()); SelectInlineAsmMemoryOperands(Ops); - EVT VTs[] = { MVT::Other, MVT::Glue }; + const EVT VTs[] = {MVT::Other, MVT::Glue}; SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N), VTs, Ops); New->setNodeId(-1); return New.getNode(); diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 1271f6b..3cc7a98 100644 --- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -224,6 +224,7 @@ static void removeDuplicatesGCPtrs(SmallVectorImpl<const Value *> &Bases, /// call node. Also update NodeMap so that getValue(statepoint) will /// reference lowered call result static SDNode *lowerCallFromStatepoint(ImmutableStatepoint StatepointSite, + MachineBasicBlock *LandingPad, SelectionDAGBuilder &Builder) { ImmutableCallSite CS(StatepointSite.getCallSite()); @@ -245,15 +246,29 @@ static SDNode *lowerCallFromStatepoint(ImmutableStatepoint StatepointSite, Tmp->setTailCall(CS.isTailCall()); Tmp->setCallingConv(CS.getCallingConv()); Tmp->setAttributes(CS.getAttributes()); - Builder.LowerCallTo(Tmp, Builder.getValue(ActualCallee), false); + Builder.LowerCallTo(Tmp, Builder.getValue(ActualCallee), false, LandingPad); // Handle the return value of the call iff any. const bool HasDef = !Tmp->getType()->isVoidTy(); if (HasDef) { - // The value of the statepoint itself will be the value of call itself. - // We'll replace the actually call node shortly. gc_result will grab - // this value. - Builder.setValue(CS.getInstruction(), Builder.getValue(Tmp)); + if (CS.isInvoke()) { + // Result value will be used in different basic block for invokes + // so we need to export it now. But statepoint call has a different type + // than the actuall call. It means that standart exporting mechanism will + // create register of the wrong type. So instead we need to create + // register with correct type and save value into it manually. + // TODO: To eliminate this problem we can remove gc.result intrinsics + // completelly and make statepoint call to return a tuple. + unsigned reg = Builder.FuncInfo.CreateRegs(Tmp->getType()); + Builder.CopyValueToVirtualRegister(Tmp, reg); + Builder.FuncInfo.ValueMap[CS.getInstruction()] = reg; + } + else { + // The value of the statepoint itself will be the value of call itself. + // We'll replace the actually call node shortly. gc_result will grab + // this value. + Builder.setValue(CS.getInstruction(), Builder.getValue(Tmp)); + } } else { // The token value is never used from here on, just generate a poison value Builder.setValue(CS.getInstruction(), Builder.DAG.getIntPtrConstant(-1)); @@ -267,6 +282,15 @@ static SDNode *lowerCallFromStatepoint(ImmutableStatepoint StatepointSite, // Search for the call node // The following code is essentially reverse engineering X86's // LowerCallTo. + // We are expecting DAG to have the following form: + // ch = eh_label (only in case of invoke statepoint) + // ch, glue = callseq_start ch + // ch, glue = X86::Call ch, glue + // ch, glue = callseq_end ch, glue + // ch = eh_label ch (only in case of invoke statepoint) + // + // DAG root will be either last eh_label or callseq_end. + SDNode *CallNode = nullptr; // We just emitted a call, so it should be last thing generated @@ -276,8 +300,11 @@ static SDNode *lowerCallFromStatepoint(ImmutableStatepoint StatepointSite, SDNode *CallEnd = Chain.getNode(); int Sanity = 0; while (CallEnd->getOpcode() != ISD::CALLSEQ_END) { - CallEnd = CallEnd->getGluedNode(); - assert(CallEnd && "Can not find call node"); + assert(CallEnd->getNumOperands() >= 1 && + CallEnd->getOperand(0).getValueType() == MVT::Other); + + CallEnd = CallEnd->getOperand(0).getNode(); + assert(Sanity < 20 && "should have found call end already"); Sanity++; } @@ -506,7 +533,9 @@ void SelectionDAGBuilder::visitStatepoint(const CallInst &CI) { LowerStatepoint(ImmutableStatepoint(&CI)); } -void SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP) { +void +SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP, + MachineBasicBlock *LandingPad/*=nullptr*/) { // The basic scheme here is that information about both the original call and // the safepoint is encoded in the CallInst. We create a temporary call and // lower it, then reverse engineer the calling sequence. @@ -542,13 +571,12 @@ void SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP) { } #endif - // Lower statepoint vmstate and gcstate arguments SmallVector<SDValue, 10> LoweredArgs; lowerStatepointMetaArgs(LoweredArgs, ISP, *this); // Get call node, we will replace it later with statepoint - SDNode *CallNode = lowerCallFromStatepoint(ISP, *this); + SDNode *CallNode = lowerCallFromStatepoint(ISP, LandingPad, *this); // Construct the actual STATEPOINT node with all the appropriate arguments // and return values. @@ -634,7 +662,24 @@ void SelectionDAGBuilder::visitGCResult(const CallInst &CI) { assert(isStatepoint(I) && "first argument must be a statepoint token"); - setValue(&CI, getValue(I)); + if (isa<InvokeInst>(I)) { + // For invokes we should have stored call result in a virtual register. + // We can not use default getValue() functionality to copy value from this + // register because statepoint and actuall call return types can be + // different, and getValue() will use CopyFromReg of the wrong type, + // which is always i32 in our case. + PointerType *CalleeType = cast<PointerType>( + ImmutableStatepoint(I).actualCallee()->getType()); + Type *RetTy = cast<FunctionType>( + CalleeType->getElementType())->getReturnType(); + SDValue CopyFromReg = getCopyFromRegs(I, RetTy); + + assert(CopyFromReg.getNode()); + setValue(&CI, CopyFromReg); + } + else { + setValue(&CI, getValue(I)); + } } void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) { diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 0a3c926..ddbf0b2 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -96,18 +96,21 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, for (unsigned i = 0; i != NumOps; ++i) { Entry.Node = Ops[i]; Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); - Entry.isSExt = isSigned; - Entry.isZExt = !isSigned; + Entry.isSExt = shouldSignExtendTypeInLibCall(Ops[i].getValueType(), isSigned); + Entry.isZExt = !shouldSignExtendTypeInLibCall(Ops[i].getValueType(), isSigned); Args.push_back(Entry); } + if (LC == RTLIB::UNKNOWN_LIBCALL) + report_fatal_error("Unsupported library call operation!"); SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy()); Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); TargetLowering::CallLoweringInfo CLI(DAG); + bool signExtend = shouldSignExtendTypeInLibCall(RetVT, isSigned); CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) .setNoReturn(doesNotReturn).setDiscardResult(!isReturnValueUsed) - .setSExtResult(isSigned).setZExtResult(!isSigned); + .setSExtResult(signExtend).setZExtResult(!signExtend); return LowerCallTo(CLI); } diff --git a/lib/CodeGen/ShadowStackGCLowering.cpp b/lib/CodeGen/ShadowStackGCLowering.cpp index f6393a5..66a6a3c 100644 --- a/lib/CodeGen/ShadowStackGCLowering.cpp +++ b/lib/CodeGen/ShadowStackGCLowering.cpp @@ -53,10 +53,10 @@ private: Type *GetConcreteStackEntryType(Function &F); void CollectRoots(Function &F); static GetElementPtrInst *CreateGEP(LLVMContext &Context, IRBuilder<> &B, - Value *BasePtr, int Idx1, + Type *Ty, Value *BasePtr, int Idx1, const char *Name); static GetElementPtrInst *CreateGEP(LLVMContext &Context, IRBuilder<> &B, - Value *BasePtr, int Idx1, int Idx2, + Type *Ty, Value *BasePtr, int Idx1, int Idx2, const char *Name); }; } @@ -343,13 +343,14 @@ void ShadowStackGCLowering::CollectRoots(Function &F) { } GetElementPtrInst *ShadowStackGCLowering::CreateGEP(LLVMContext &Context, - IRBuilder<> &B, Value *BasePtr, - int Idx, int Idx2, - const char *Name) { + IRBuilder<> &B, Type *Ty, + Value *BasePtr, int Idx, + int Idx2, + const char *Name) { Value *Indices[] = {ConstantInt::get(Type::getInt32Ty(Context), 0), ConstantInt::get(Type::getInt32Ty(Context), Idx), ConstantInt::get(Type::getInt32Ty(Context), Idx2)}; - Value *Val = B.CreateGEP(BasePtr, Indices, Name); + Value *Val = B.CreateGEP(Ty, BasePtr, Indices, Name); assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant"); @@ -357,11 +358,11 @@ GetElementPtrInst *ShadowStackGCLowering::CreateGEP(LLVMContext &Context, } GetElementPtrInst *ShadowStackGCLowering::CreateGEP(LLVMContext &Context, - IRBuilder<> &B, Value *BasePtr, + IRBuilder<> &B, Type *Ty, Value *BasePtr, int Idx, const char *Name) { Value *Indices[] = {ConstantInt::get(Type::getInt32Ty(Context), 0), ConstantInt::get(Type::getInt32Ty(Context), Idx)}; - Value *Val = B.CreateGEP(BasePtr, Indices, Name); + Value *Val = B.CreateGEP(Ty, BasePtr, Indices, Name); assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant"); @@ -402,14 +403,15 @@ bool ShadowStackGCLowering::runOnFunction(Function &F) { // Initialize the map pointer and load the current head of the shadow stack. Instruction *CurrentHead = AtEntry.CreateLoad(Head, "gc_currhead"); - Instruction *EntryMapPtr = - CreateGEP(Context, AtEntry, StackEntry, 0, 1, "gc_frame.map"); + Instruction *EntryMapPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy, + StackEntry, 0, 1, "gc_frame.map"); AtEntry.CreateStore(FrameMap, EntryMapPtr); // After all the allocas... for (unsigned I = 0, E = Roots.size(); I != E; ++I) { // For each root, find the corresponding slot in the aggregate... - Value *SlotPtr = CreateGEP(Context, AtEntry, StackEntry, 1 + I, "gc_root"); + Value *SlotPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy, + StackEntry, 1 + I, "gc_root"); // And use it in lieu of the alloca. AllocaInst *OriginalAlloca = Roots[I].second; @@ -426,10 +428,10 @@ bool ShadowStackGCLowering::runOnFunction(Function &F) { AtEntry.SetInsertPoint(IP->getParent(), IP); // Push the entry onto the shadow stack. - Instruction *EntryNextPtr = - CreateGEP(Context, AtEntry, StackEntry, 0, 0, "gc_frame.next"); - Instruction *NewHeadVal = - CreateGEP(Context, AtEntry, StackEntry, 0, "gc_newhead"); + Instruction *EntryNextPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy, + StackEntry, 0, 0, "gc_frame.next"); + Instruction *NewHeadVal = CreateGEP(Context, AtEntry, ConcreteStackEntryTy, + StackEntry, 0, "gc_newhead"); AtEntry.CreateStore(CurrentHead, EntryNextPtr); AtEntry.CreateStore(NewHeadVal, Head); @@ -439,7 +441,8 @@ bool ShadowStackGCLowering::runOnFunction(Function &F) { // Pop the entry from the shadow stack. Don't reuse CurrentHead from // AtEntry, since that would make the value live for the entire function. Instruction *EntryNextPtr2 = - CreateGEP(Context, *AtExit, StackEntry, 0, 0, "gc_frame.next"); + CreateGEP(Context, *AtExit, ConcreteStackEntryTy, StackEntry, 0, 0, + "gc_frame.next"); Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead"); AtExit->CreateStore(SavedHead, Head); } diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index 35e4292..2335a88 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -128,7 +128,8 @@ void SjLjEHPrepare::insertCallSiteStore(Instruction *I, int Number) { Value *Zero = ConstantInt::get(Int32Ty, 0); Value *One = ConstantInt::get(Int32Ty, 1); Value *Idxs[2] = { Zero, One }; - Value *CallSite = Builder.CreateGEP(FuncCtx, Idxs, "call_site"); + Value *CallSite = + Builder.CreateGEP(FunctionContextTy, FuncCtx, Idxs, "call_site"); // Insert a store of the call-site number ConstantInt *CallSiteNoC = diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp index d46621d..025ae70 100644 --- a/lib/CodeGen/SlotIndexes.cpp +++ b/lib/CodeGen/SlotIndexes.cpp @@ -127,7 +127,7 @@ void SlotIndexes::renumberIndexes() { void SlotIndexes::renumberIndexes(IndexList::iterator curItr) { // Number indexes with half the default spacing so we can catch up quickly. const unsigned Space = SlotIndex::InstrDist/2; - assert((Space & 3) == 0 && "InstrDist must be a multiple of 2*NUM"); + static_assert((Space & 3) == 0, "InstrDist must be a multiple of 2*NUM"); IndexList::iterator startItr = std::prev(curItr); unsigned index = startItr->getIndex(); diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp index faf94b6..7572803 100644 --- a/lib/CodeGen/StackColoring.cpp +++ b/lib/CodeGen/StackColoring.cpp @@ -364,7 +364,7 @@ void StackColoring::calculateLocalLiveness() { } } - BBSet = NextBBSet; + BBSet = std::move(NextBBSet); }// while changed. } diff --git a/lib/CodeGen/StackMapLivenessAnalysis.cpp b/lib/CodeGen/StackMapLivenessAnalysis.cpp index 767f43a..d88be57 100644 --- a/lib/CodeGen/StackMapLivenessAnalysis.cpp +++ b/lib/CodeGen/StackMapLivenessAnalysis.cpp @@ -14,24 +14,24 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/StackMapLivenessAnalysis.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "stackmaps" -namespace llvm { -cl::opt<bool> EnablePatchPointLiveness("enable-patchpoint-liveness", - cl::Hidden, cl::init(true), - cl::desc("Enable PatchPoint Liveness Analysis Pass")); -} +static cl::opt<bool> EnablePatchPointLiveness( + "enable-patchpoint-liveness", cl::Hidden, cl::init(true), + cl::desc("Enable PatchPoint Liveness Analysis Pass")); STATISTIC(NumStackMapFuncVisited, "Number of functions visited"); STATISTIC(NumStackMapFuncSkipped, "Number of functions skipped"); @@ -39,6 +39,46 @@ STATISTIC(NumBBsVisited, "Number of basic blocks visited"); STATISTIC(NumBBsHaveNoStackmap, "Number of basic blocks with no stackmap"); STATISTIC(NumStackMaps, "Number of StackMaps visited"); +namespace { +/// \brief This pass calculates the liveness information for each basic block in +/// a function and attaches the register live-out information to a patchpoint +/// intrinsic if present. +/// +/// This pass can be disabled via the -enable-patchpoint-liveness=false flag. +/// The pass skips functions that don't have any patchpoint intrinsics. The +/// information provided by this pass is optional and not required by the +/// aformentioned intrinsic to function. +class StackMapLiveness : public MachineFunctionPass { + MachineFunction *MF; + const TargetRegisterInfo *TRI; + LivePhysRegs LiveRegs; + +public: + static char ID; + + /// \brief Default construct and initialize the pass. + StackMapLiveness(); + + /// \brief Tell the pass manager which passes we depend on and what + /// information we preserve. + void getAnalysisUsage(AnalysisUsage &AU) const override; + + /// \brief Calculate the liveness information for the given machine function. + bool runOnMachineFunction(MachineFunction &MF) override; + +private: + /// \brief Performs the actual liveness calculation for the function. + bool calculateLiveness(); + + /// \brief Add the current register live set to the instruction. + void addLiveOutSetToMI(MachineInstr &MI); + + /// \brief Create a register mask and initialize it with the registers from + /// the register live set. + uint32_t *createRegisterMask() const; +}; +} // namespace + char StackMapLiveness::ID = 0; char &llvm::StackMapLivenessID = StackMapLiveness::ID; INITIALIZE_PASS(StackMapLiveness, "stackmap-liveness", @@ -60,18 +100,18 @@ void StackMapLiveness::getAnalysisUsage(AnalysisUsage &AU) const { } /// Calculate the liveness information for the given machine function. -bool StackMapLiveness::runOnMachineFunction(MachineFunction &_MF) { +bool StackMapLiveness::runOnMachineFunction(MachineFunction &MF) { if (!EnablePatchPointLiveness) return false; - DEBUG(dbgs() << "********** COMPUTING STACKMAP LIVENESS: " - << _MF.getName() << " **********\n"); - MF = &_MF; - TRI = MF->getSubtarget().getRegisterInfo(); + DEBUG(dbgs() << "********** COMPUTING STACKMAP LIVENESS: " << MF.getName() + << " **********\n"); + this->MF = &MF; + TRI = MF.getSubtarget().getRegisterInfo(); ++NumStackMapFuncVisited; // Skip this function if there are no patchpoints to process. - if (!MF->getFrameInfo()->hasPatchPoint()) { + if (!MF.getFrameInfo()->hasPatchPoint()) { ++NumStackMapFuncSkipped; return false; } diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp index 5d46419..aa18dea 100644 --- a/lib/CodeGen/StackMaps.cpp +++ b/lib/CodeGen/StackMaps.cpp @@ -19,8 +19,6 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -76,10 +74,21 @@ StackMaps::StackMaps(AsmPrinter &AP) : AP(AP) { llvm_unreachable("Unsupported stackmap version!"); } +/// Go up the super-register chain until we hit a valid dwarf register number. +static unsigned getDwarfRegNum(unsigned Reg, const TargetRegisterInfo *TRI) { + int RegNo = TRI->getDwarfRegNum(Reg, false); + for (MCSuperRegIterator SR(Reg, TRI); SR.isValid() && RegNo < 0; ++SR) + RegNo = TRI->getDwarfRegNum(*SR, false); + + assert(RegNo >= 0 && "Invalid Dwarf register number."); + return (unsigned) RegNo; +} + MachineInstr::const_mop_iterator StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI, MachineInstr::const_mop_iterator MOE, LocationVec &Locs, LiveOutVec &LiveOuts) const { + const TargetRegisterInfo *TRI = AP.MF->getSubtarget().getRegisterInfo(); if (MOI->isImm()) { switch (MOI->getImm()) { default: llvm_unreachable("Unrecognized operand type."); @@ -89,7 +98,8 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI, Size /= 8; unsigned Reg = (++MOI)->getReg(); int64_t Imm = (++MOI)->getImm(); - Locs.push_back(Location(StackMaps::Location::Direct, Size, Reg, Imm)); + Locs.push_back(Location(StackMaps::Location::Direct, Size, + getDwarfRegNum(Reg, TRI), Imm)); break; } case StackMaps::IndirectMemRefOp: { @@ -97,7 +107,8 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI, assert(Size > 0 && "Need a valid size for indirect memory locations."); unsigned Reg = (++MOI)->getReg(); int64_t Imm = (++MOI)->getImm(); - Locs.push_back(Location(StackMaps::Location::Indirect, Size, Reg, Imm)); + Locs.push_back(Location(StackMaps::Location::Indirect, Size, + getDwarfRegNum(Reg, TRI), Imm)); break; } case StackMaps::ConstantOp: { @@ -122,12 +133,18 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI, assert(TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) && "Virtreg operands should have been rewritten before now."); - const TargetRegisterClass *RC = - AP.TM.getSubtargetImpl()->getRegisterInfo()->getMinimalPhysRegClass( - MOI->getReg()); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(MOI->getReg()); assert(!MOI->getSubReg() && "Physical subreg still around."); + + unsigned Offset = 0; + unsigned RegNo = getDwarfRegNum(MOI->getReg(), TRI); + unsigned LLVMRegNo = TRI->getLLVMRegNum(RegNo, false); + unsigned SubRegIdx = TRI->getSubRegIndex(LLVMRegNo, MOI->getReg()); + if (SubRegIdx) + Offset = TRI->getSubRegIdxOffset(SubRegIdx); + Locs.push_back( - Location(Location::Register, RC->getSize(), MOI->getReg(), 0)); + Location(Location::Register, RC->getSize(), RegNo, Offset)); return ++MOI; } @@ -137,14 +154,74 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI, return ++MOI; } -/// Go up the super-register chain until we hit a valid dwarf register number. -static unsigned getDwarfRegNum(unsigned Reg, const TargetRegisterInfo *TRI) { - int RegNo = TRI->getDwarfRegNum(Reg, false); - for (MCSuperRegIterator SR(Reg, TRI); SR.isValid() && RegNo < 0; ++SR) - RegNo = TRI->getDwarfRegNum(*SR, false); +void StackMaps::print(raw_ostream &OS) { + const TargetRegisterInfo *TRI = + AP.MF ? AP.MF->getSubtarget().getRegisterInfo() : nullptr; + OS << WSMP << "callsites:\n"; + for (const auto &CSI : CSInfos) { + const LocationVec &CSLocs = CSI.Locations; + const LiveOutVec &LiveOuts = CSI.LiveOuts; - assert(RegNo >= 0 && "Invalid Dwarf register number."); - return (unsigned) RegNo; + OS << WSMP << "callsite " << CSI.ID << "\n"; + OS << WSMP << " has " << CSLocs.size() << " locations\n"; + + unsigned OperIdx = 0; + for (const auto &Loc : CSLocs) { + OS << WSMP << " Loc " << OperIdx << ": "; + switch (Loc.LocType) { + case Location::Unprocessed: + OS << "<Unprocessed operand>"; + break; + case Location::Register: + OS << "Register "; + if (TRI) + OS << TRI->getName(Loc.Reg); + else + OS << Loc.Reg; + break; + case Location::Direct: + OS << "Direct "; + if (TRI) + OS << TRI->getName(Loc.Reg); + else + OS << Loc.Reg; + if (Loc.Offset) + OS << " + " << Loc.Offset; + break; + case Location::Indirect: + OS << "Indirect "; + if (TRI) + OS << TRI->getName(Loc.Reg); + else + OS << Loc.Reg; + OS << "+" << Loc.Offset; + break; + case Location::Constant: + OS << "Constant " << Loc.Offset; + break; + case Location::ConstantIndex: + OS << "Constant Index " << Loc.Offset; + break; + } + OS << " [encoding: .byte " << Loc.LocType << ", .byte " << Loc.Size + << ", .short " << Loc.Reg << ", .int " << Loc.Offset << "]\n"; + OperIdx++; + } + + OS << WSMP << " has " << LiveOuts.size() << " live-out registers\n"; + + OperIdx = 0; + for (const auto &LO : LiveOuts) { + OS << WSMP << " LO " << OperIdx << ": "; + if (TRI) + OS << TRI->getName(LO.Reg); + else + OS << LO.Reg; + OS << " [encoding: .short " << LO.RegNo << ", .byte 0, .byte " + << LO.Size << "]\n"; + OperIdx++; + } + } } /// Create a live-out register record for the given register Reg. @@ -160,7 +237,7 @@ StackMaps::createLiveOutReg(unsigned Reg, const TargetRegisterInfo *TRI) const { StackMaps::LiveOutVec StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const { assert(Mask && "No register mask specified"); - const TargetRegisterInfo *TRI = AP.TM.getSubtargetImpl()->getRegisterInfo(); + const TargetRegisterInfo *TRI = AP.MF->getSubtarget().getRegisterInfo(); LiveOutVec LiveOuts; // Create a LiveOutReg for each bit that is set in the register mask. @@ -383,16 +460,13 @@ void StackMaps::emitConstantPoolEntries(MCStreamer &OS) { /// 0x3, Indirect, [Reg + Offset] (spilled value) /// 0x4, Constant, Offset (small constant) /// 0x5, ConstIndex, Constants[Offset] (large constant) -void StackMaps::emitCallsiteEntries(MCStreamer &OS, - const TargetRegisterInfo *TRI) { +void StackMaps::emitCallsiteEntries(MCStreamer &OS) { + DEBUG(print(dbgs())); // Callsite entries. - DEBUG(dbgs() << WSMP << "callsites:\n"); for (const auto &CSI : CSInfos) { const LocationVec &CSLocs = CSI.Locations; const LiveOutVec &LiveOuts = CSI.LiveOuts; - DEBUG(dbgs() << WSMP << "callsite " << CSI.ID << "\n"); - // Verify stack map entry. It's better to communicate a problem to the // runtime than crash in case of in-process compilation. Currently, we do // simple overflow checks, but we may eventually communicate other @@ -413,83 +487,20 @@ void StackMaps::emitCallsiteEntries(MCStreamer &OS, // Reserved for flags. OS.EmitIntValue(0, 2); - - DEBUG(dbgs() << WSMP << " has " << CSLocs.size() << " locations\n"); - OS.EmitIntValue(CSLocs.size(), 2); - unsigned OperIdx = 0; for (const auto &Loc : CSLocs) { - unsigned RegNo = 0; - int Offset = Loc.Offset; - if(Loc.Reg) { - RegNo = getDwarfRegNum(Loc.Reg, TRI); - - // If this is a register location, put the subregister byte offset in - // the location offset. - if (Loc.LocType == Location::Register) { - assert(!Loc.Offset && "Register location should have zero offset"); - unsigned LLVMRegNo = TRI->getLLVMRegNum(RegNo, false); - unsigned SubRegIdx = TRI->getSubRegIndex(LLVMRegNo, Loc.Reg); - if (SubRegIdx) - Offset = TRI->getSubRegIdxOffset(SubRegIdx); - } - } - else { - assert(Loc.LocType != Location::Register && - "Missing location register"); - } - - DEBUG(dbgs() << WSMP << " Loc " << OperIdx << ": "; - switch (Loc.LocType) { - case Location::Unprocessed: - dbgs() << "<Unprocessed operand>"; - break; - case Location::Register: - dbgs() << "Register " << TRI->getName(Loc.Reg); - break; - case Location::Direct: - dbgs() << "Direct " << TRI->getName(Loc.Reg); - if (Loc.Offset) - dbgs() << " + " << Loc.Offset; - break; - case Location::Indirect: - dbgs() << "Indirect " << TRI->getName(Loc.Reg) - << " + " << Loc.Offset; - break; - case Location::Constant: - dbgs() << "Constant " << Loc.Offset; - break; - case Location::ConstantIndex: - dbgs() << "Constant Index " << Loc.Offset; - break; - } - dbgs() << " [encoding: .byte " << Loc.LocType - << ", .byte " << Loc.Size - << ", .short " << RegNo - << ", .int " << Offset << "]\n"; - ); - OS.EmitIntValue(Loc.LocType, 1); OS.EmitIntValue(Loc.Size, 1); - OS.EmitIntValue(RegNo, 2); - OS.EmitIntValue(Offset, 4); - OperIdx++; + OS.EmitIntValue(Loc.Reg, 2); + OS.EmitIntValue(Loc.Offset, 4); } - DEBUG(dbgs() << WSMP << " has " << LiveOuts.size() - << " live-out registers\n"); - // Num live-out registers and padding to align to 4 byte. OS.EmitIntValue(0, 2); OS.EmitIntValue(LiveOuts.size(), 2); - OperIdx = 0; for (const auto &LO : LiveOuts) { - DEBUG(dbgs() << WSMP << " LO " << OperIdx << ": " - << TRI->getName(LO.Reg) - << " [encoding: .short " << LO.RegNo - << ", .byte 0, .byte " << LO.Size << "]\n"); OS.EmitIntValue(LO.RegNo, 2); OS.EmitIntValue(0, 1); OS.EmitIntValue(LO.Size, 1); @@ -512,7 +523,6 @@ void StackMaps::serializeToStackMapSection() { MCContext &OutContext = AP.OutStreamer.getContext(); MCStreamer &OS = AP.OutStreamer; - const TargetRegisterInfo *TRI = AP.TM.getSubtargetImpl()->getRegisterInfo(); // Create the section. const MCSection *StackMapSection = @@ -527,7 +537,7 @@ void StackMaps::serializeToStackMapSection() { emitStackmapHeader(OS); emitFunctionFrameRecords(OS); emitConstantPoolEntries(OS); - emitCallsiteEntries(OS, TRI); + emitCallsiteEntries(OS); OS.AddBlankLine(); // Clean up. diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index cc72e5e..a5a175f 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -184,10 +184,18 @@ void StackSlotColoring::InitializeSlots() { UsedColors.resize(LastFI); Assignments.resize(LastFI); + typedef std::iterator_traits<LiveStacks::iterator>::value_type Pair; + SmallVector<Pair *, 16> Intervals; + Intervals.reserve(LS->getNumIntervals()); + for (auto &I : *LS) + Intervals.push_back(&I); + std::sort(Intervals.begin(), Intervals.end(), + [](Pair *LHS, Pair *RHS) { return LHS->first < RHS->first; }); + // Gather all spill slots into a list. DEBUG(dbgs() << "Spill slot intervals:\n"); - for (LiveStacks::iterator i = LS->begin(), e = LS->end(); i != e; ++i) { - LiveInterval &li = i->second; + for (auto *I : Intervals) { + LiveInterval &li = I->second; DEBUG(li.dump()); int FI = TargetRegisterInfo::stackSlot2Index(li.reg); if (MFI->isDeadObjectIndex(FI)) diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp index 2566c1f..38725b5 100644 --- a/lib/CodeGen/TargetInstrInfo.cpp +++ b/lib/CodeGen/TargetInstrInfo.cpp @@ -285,21 +285,20 @@ bool TargetInstrInfo::hasStoreToStackSlot(const MachineInstr *MI, bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC, unsigned SubIdx, unsigned &Size, unsigned &Offset, - const TargetMachine *TM) const { + const MachineFunction &MF) const { if (!SubIdx) { Size = RC->getSize(); Offset = 0; return true; } - unsigned BitSize = - TM->getSubtargetImpl()->getRegisterInfo()->getSubRegIdxSize(SubIdx); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + unsigned BitSize = TRI->getSubRegIdxSize(SubIdx); // Convert bit size to byte size to be consistent with // MCRegisterClass::getSize(). if (BitSize % 8) return false; - int BitOffset = - TM->getSubtargetImpl()->getRegisterInfo()->getSubRegIdxOffset(SubIdx); + int BitOffset = TRI->getSubRegIdxOffset(SubIdx); if (BitOffset < 0 || BitOffset % 8) return false; @@ -308,7 +307,7 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC, assert(RC->getSize() >= (Offset + Size) && "bad subregister range"); - if (!TM->getDataLayout()->isLittleEndian()) { + if (!MF.getTarget().getDataLayout()->isLittleEndian()) { Offset = RC->getSize() - (Offset + Size); } return true; @@ -377,16 +376,13 @@ void TargetInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { llvm_unreachable("Not a MachO target"); } -bool TargetInstrInfo:: -canFoldMemoryOperand(const MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops) const { +bool TargetInstrInfo::canFoldMemoryOperand(const MachineInstr *MI, + ArrayRef<unsigned> Ops) const { return MI->isCopy() && Ops.size() == 1 && canFoldCopy(MI, Ops[0]); } -static MachineInstr* foldPatchpoint(MachineFunction &MF, - MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops, - int FrameIndex, +static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr *MI, + ArrayRef<unsigned> Ops, int FrameIndex, const TargetInstrInfo &TII) { unsigned StartIdx = 0; switch (MI->getOpcode()) { @@ -405,9 +401,8 @@ static MachineInstr* foldPatchpoint(MachineFunction &MF, // Return false if any operands requested for folding are not foldable (not // part of the stackmap's live values). - for (SmallVectorImpl<unsigned>::const_iterator I = Ops.begin(), E = Ops.end(); - I != E; ++I) { - if (*I < StartIdx) + for (unsigned Op : Ops) { + if (Op < StartIdx) return nullptr; } @@ -427,8 +422,8 @@ static MachineInstr* foldPatchpoint(MachineFunction &MF, // Compute the spill slot size and offset. const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(MO.getReg()); - bool Valid = TII.getStackSlotRange(RC, MO.getSubReg(), SpillSize, - SpillOffset, &MF.getTarget()); + bool Valid = + TII.getStackSlotRange(RC, MO.getSubReg(), SpillSize, SpillOffset, MF); if (!Valid) report_fatal_error("cannot spill patchpoint subregister operand"); MIB.addImm(StackMaps::IndirectMemRefOp); @@ -448,10 +443,9 @@ static MachineInstr* foldPatchpoint(MachineFunction &MF, /// operand folded, otherwise NULL is returned. The client is responsible for /// removing the old instruction and adding the new one in the instruction /// stream. -MachineInstr* -TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, - const SmallVectorImpl<unsigned> &Ops, - int FI) const { +MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, + ArrayRef<unsigned> Ops, + int FI) const { unsigned Flags = 0; for (unsigned i = 0, e = Ops.size(); i != e; ++i) if (MI->getOperand(Ops[i]).isDef()) @@ -517,10 +511,9 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, /// foldMemoryOperand - Same as the previous version except it allows folding /// of any load and store from / to any address, not just from a specific /// stack slot. -MachineInstr* -TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, - const SmallVectorImpl<unsigned> &Ops, - MachineInstr* LoadMI) const { +MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, + ArrayRef<unsigned> Ops, + MachineInstr *LoadMI) const { assert(LoadMI->canFoldAsLoad() && "LoadMI isn't foldable!"); #ifndef NDEBUG for (unsigned i = 0, e = Ops.size(); i != e; ++i) diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index 9048a44..58a6d52 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -664,6 +664,44 @@ RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) { return UNKNOWN_LIBCALL; } +RTLIB::Libcall RTLIB::getATOMIC(unsigned Opc, MVT VT) { +#define OP_TO_LIBCALL(Name, Enum) \ + case Name: \ + switch (VT.SimpleTy) { \ + default: \ + return UNKNOWN_LIBCALL; \ + case MVT::i8: \ + return Enum##_1; \ + case MVT::i16: \ + return Enum##_2; \ + case MVT::i32: \ + return Enum##_4; \ + case MVT::i64: \ + return Enum##_8; \ + case MVT::i128: \ + return Enum##_16; \ + } + + switch (Opc) { + OP_TO_LIBCALL(ISD::ATOMIC_SWAP, SYNC_LOCK_TEST_AND_SET) + OP_TO_LIBCALL(ISD::ATOMIC_CMP_SWAP, SYNC_VAL_COMPARE_AND_SWAP) + OP_TO_LIBCALL(ISD::ATOMIC_LOAD_ADD, SYNC_FETCH_AND_ADD) + OP_TO_LIBCALL(ISD::ATOMIC_LOAD_SUB, SYNC_FETCH_AND_SUB) + OP_TO_LIBCALL(ISD::ATOMIC_LOAD_AND, SYNC_FETCH_AND_AND) + OP_TO_LIBCALL(ISD::ATOMIC_LOAD_OR, SYNC_FETCH_AND_OR) + OP_TO_LIBCALL(ISD::ATOMIC_LOAD_XOR, SYNC_FETCH_AND_XOR) + OP_TO_LIBCALL(ISD::ATOMIC_LOAD_NAND, SYNC_FETCH_AND_NAND) + OP_TO_LIBCALL(ISD::ATOMIC_LOAD_MAX, SYNC_FETCH_AND_MAX) + OP_TO_LIBCALL(ISD::ATOMIC_LOAD_UMAX, SYNC_FETCH_AND_UMAX) + OP_TO_LIBCALL(ISD::ATOMIC_LOAD_MIN, SYNC_FETCH_AND_MIN) + OP_TO_LIBCALL(ISD::ATOMIC_LOAD_UMIN, SYNC_FETCH_AND_UMIN) + } + +#undef OP_TO_LIBCALL + + return UNKNOWN_LIBCALL; +} + /// InitCmpLibcallCCs - Set default comparison libcall CC. /// static void InitCmpLibcallCCs(ISD::CondCode *CCs) { @@ -695,12 +733,11 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) { } /// NOTE: The TargetMachine owns TLOF. -TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) - : TM(tm), DL(TM.getDataLayout()) { +TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) { initActions(); // Perform these initializations only once. - IsLittleEndian = DL->isLittleEndian(); + IsLittleEndian = getDataLayout()->isLittleEndian(); MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8; MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize = MaxStoresPerMemmoveOptSize = 4; @@ -792,58 +829,21 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::ConstantFP, MVT::f128, Expand); // These library functions default to expand. - setOperationAction(ISD::FLOG , MVT::f16, Expand); - setOperationAction(ISD::FLOG2, MVT::f16, Expand); - setOperationAction(ISD::FLOG10, MVT::f16, Expand); - setOperationAction(ISD::FEXP , MVT::f16, Expand); - setOperationAction(ISD::FEXP2, MVT::f16, Expand); - setOperationAction(ISD::FFLOOR, MVT::f16, Expand); - setOperationAction(ISD::FMINNUM, MVT::f16, Expand); - setOperationAction(ISD::FMAXNUM, MVT::f16, Expand); - setOperationAction(ISD::FNEARBYINT, MVT::f16, Expand); - setOperationAction(ISD::FCEIL, MVT::f16, Expand); - setOperationAction(ISD::FRINT, MVT::f16, Expand); - setOperationAction(ISD::FTRUNC, MVT::f16, Expand); - setOperationAction(ISD::FROUND, MVT::f16, Expand); - setOperationAction(ISD::FLOG , MVT::f32, Expand); - setOperationAction(ISD::FLOG2, MVT::f32, Expand); - setOperationAction(ISD::FLOG10, MVT::f32, Expand); - setOperationAction(ISD::FEXP , MVT::f32, Expand); - setOperationAction(ISD::FEXP2, MVT::f32, Expand); - setOperationAction(ISD::FFLOOR, MVT::f32, Expand); - setOperationAction(ISD::FMINNUM, MVT::f32, Expand); - setOperationAction(ISD::FMAXNUM, MVT::f32, Expand); - setOperationAction(ISD::FNEARBYINT, MVT::f32, Expand); - setOperationAction(ISD::FCEIL, MVT::f32, Expand); - setOperationAction(ISD::FRINT, MVT::f32, Expand); - setOperationAction(ISD::FTRUNC, MVT::f32, Expand); - setOperationAction(ISD::FROUND, MVT::f32, Expand); - setOperationAction(ISD::FLOG , MVT::f64, Expand); - setOperationAction(ISD::FLOG2, MVT::f64, Expand); - setOperationAction(ISD::FLOG10, MVT::f64, Expand); - setOperationAction(ISD::FEXP , MVT::f64, Expand); - setOperationAction(ISD::FEXP2, MVT::f64, Expand); - setOperationAction(ISD::FFLOOR, MVT::f64, Expand); - setOperationAction(ISD::FMINNUM, MVT::f64, Expand); - setOperationAction(ISD::FMAXNUM, MVT::f64, Expand); - setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand); - setOperationAction(ISD::FCEIL, MVT::f64, Expand); - setOperationAction(ISD::FRINT, MVT::f64, Expand); - setOperationAction(ISD::FTRUNC, MVT::f64, Expand); - setOperationAction(ISD::FROUND, MVT::f64, Expand); - setOperationAction(ISD::FLOG , MVT::f128, Expand); - setOperationAction(ISD::FLOG2, MVT::f128, Expand); - setOperationAction(ISD::FLOG10, MVT::f128, Expand); - setOperationAction(ISD::FEXP , MVT::f128, Expand); - setOperationAction(ISD::FEXP2, MVT::f128, Expand); - setOperationAction(ISD::FFLOOR, MVT::f128, Expand); - setOperationAction(ISD::FMINNUM, MVT::f128, Expand); - setOperationAction(ISD::FMAXNUM, MVT::f128, Expand); - setOperationAction(ISD::FNEARBYINT, MVT::f128, Expand); - setOperationAction(ISD::FCEIL, MVT::f128, Expand); - setOperationAction(ISD::FRINT, MVT::f128, Expand); - setOperationAction(ISD::FTRUNC, MVT::f128, Expand); - setOperationAction(ISD::FROUND, MVT::f128, Expand); + for (MVT VT : {MVT::f32, MVT::f64, MVT::f128}) { + setOperationAction(ISD::FLOG , VT, Expand); + setOperationAction(ISD::FLOG2, VT, Expand); + setOperationAction(ISD::FLOG10, VT, Expand); + setOperationAction(ISD::FEXP , VT, Expand); + setOperationAction(ISD::FEXP2, VT, Expand); + setOperationAction(ISD::FFLOOR, VT, Expand); + setOperationAction(ISD::FMINNUM, VT, Expand); + setOperationAction(ISD::FMAXNUM, VT, Expand); + setOperationAction(ISD::FNEARBYINT, VT, Expand); + setOperationAction(ISD::FCEIL, VT, Expand); + setOperationAction(ISD::FRINT, VT, Expand); + setOperationAction(ISD::FTRUNC, VT, Expand); + setOperationAction(ISD::FROUND, VT, Expand); + } // Default ISD::TRAP to expand (which turns it into abort). setOperationAction(ISD::TRAP, MVT::Other, Expand); @@ -859,7 +859,7 @@ MVT TargetLoweringBase::getPointerTy(uint32_t AS) const { } unsigned TargetLoweringBase::getPointerSizeInBits(uint32_t AS) const { - return DL->getPointerSizeInBits(AS); + return getDataLayout()->getPointerSizeInBits(AS); } unsigned TargetLoweringBase::getPointerTypeSizeInBits(Type *Ty) const { @@ -868,7 +868,7 @@ unsigned TargetLoweringBase::getPointerTypeSizeInBits(Type *Ty) const { } MVT TargetLoweringBase::getScalarShiftAmountTy(EVT LHSTy) const { - return MVT::getIntegerVT(8*DL->getPointerSize(0)); + return MVT::getIntegerVT(8 * getDataLayout()->getPointerSize(0)); } EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy) const { @@ -1144,6 +1144,10 @@ TargetLoweringBase::emitPatchPoint(MachineInstr *MI, /// findRepresentativeClass - Return the largest legal super-reg register class /// of the register class for the specified type and its associated "cost". +// This function is in TargetLowering because it uses RegClassForVT which would +// need to be moved to TargetRegisterInfo and would necessitate moving +// isTypeLegal over as well - a massive change that would just require +// TargetLowering having a TargetRegisterInfo class member that it would use. std::pair<const TargetRegisterClass *, uint8_t> TargetLoweringBase::findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const { @@ -1498,7 +1502,7 @@ void llvm::GetReturnInfo(Type* ReturnType, AttributeSet attr, /// function arguments in the caller parameter area. This is the actual /// alignment, not its logarithm. unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty) const { - return DL->getABITypeAlignment(Ty); + return getDataLayout()->getABITypeAlignment(Ty); } //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index c1b34f7..bcf2aa7 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -31,6 +31,7 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCValue.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" @@ -244,22 +245,9 @@ static StringRef getSectionPrefixForGlobal(SectionKind Kind) { return ".data.rel.ro"; } -const MCSection *TargetLoweringObjectFileELF:: -SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler &Mang, const TargetMachine &TM) const { - unsigned Flags = getELFSectionFlags(Kind); - - // If we have -ffunction-section or -fdata-section then we should emit the - // global value to a uniqued section specifically for it. - bool EmitUniqueSection = false; - if (!(Flags & ELF::SHF_MERGE) && !Kind.isCommon()) { - if (Kind.isText()) - EmitUniqueSection = TM.getFunctionSections(); - else - EmitUniqueSection = TM.getDataSections(); - } - EmitUniqueSection |= GV->hasComdat(); - +static const MCSectionELF *selectELFSectionForGlobal( + MCContext &Ctx, const GlobalValue *GV, SectionKind Kind, Mangler &Mang, + const TargetMachine &TM, bool EmitUniqueSection, unsigned Flags) { unsigned EntrySize = 0; if (Kind.isMergeableCString()) { if (Kind.isMergeable2ByteCString()) { @@ -309,9 +297,29 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Name.push_back('.'); TM.getNameWithPrefix(Name, GV, Mang, true); } - return getContext().getELFSection(Name, getELFSectionType(Name, Kind), Flags, - EntrySize, Group, - EmitUniqueSection && !UniqueSectionNames); + return Ctx.getELFSection(Name, getELFSectionType(Name, Kind), Flags, + EntrySize, Group, + EmitUniqueSection && !UniqueSectionNames); +} + +const MCSection *TargetLoweringObjectFileELF::SelectSectionForGlobal( + const GlobalValue *GV, SectionKind Kind, Mangler &Mang, + const TargetMachine &TM) const { + unsigned Flags = getELFSectionFlags(Kind); + + // If we have -ffunction-section or -fdata-section then we should emit the + // global value to a uniqued section specifically for it. + bool EmitUniqueSection = false; + if (!(Flags & ELF::SHF_MERGE) && !Kind.isCommon()) { + if (Kind.isText()) + EmitUniqueSection = TM.getFunctionSections(); + else + EmitUniqueSection = TM.getDataSections(); + } + EmitUniqueSection |= GV->hasComdat(); + + return selectELFSectionForGlobal(getContext(), GV, Kind, Mang, TM, + EmitUniqueSection, Flags); } const MCSection *TargetLoweringObjectFileELF::getSectionForJumpTable( @@ -323,7 +331,8 @@ const MCSection *TargetLoweringObjectFileELF::getSectionForJumpTable( if (!EmitUniqueSection) return ReadOnlySection; - return SelectSectionForGlobal(&F, SectionKind::getReadOnly(), Mang, TM); + return selectELFSectionForGlobal(getContext(), &F, SectionKind::getReadOnly(), + Mang, TM, EmitUniqueSection, ELF::SHF_ALLOC); } bool TargetLoweringObjectFileELF::shouldPutJumpTableInFunctionSection( @@ -423,6 +432,11 @@ TargetLoweringObjectFileELF::InitializeELF(bool UseInitArray_) { // MachO //===----------------------------------------------------------------------===// +TargetLoweringObjectFileMachO::TargetLoweringObjectFileMachO() + : TargetLoweringObjectFile() { + SupportIndirectSymViaGOTPCRel = true; +} + /// getDepLibFromLinkerOpt - Extract the dependent library name from a linker /// option string. Returns StringRef() if the option does not specify a library. StringRef TargetLoweringObjectFileMachO:: @@ -697,6 +711,66 @@ MCSymbol *TargetLoweringObjectFileMachO::getCFIPersonalitySymbol( return SSym; } +const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel( + const MCSymbol *Sym, const MCValue &MV, int64_t Offset, + MachineModuleInfo *MMI, MCStreamer &Streamer) const { + // Although MachO 32-bit targets do not explictly have a GOTPCREL relocation + // as 64-bit do, we replace the GOT equivalent by accessing the final symbol + // through a non_lazy_ptr stub instead. One advantage is that it allows the + // computation of deltas to final external symbols. Example: + // + // _extgotequiv: + // .long _extfoo + // + // _delta: + // .long _extgotequiv-_delta + // + // is transformed to: + // + // _delta: + // .long L_extfoo$non_lazy_ptr-(_delta+0) + // + // .section __IMPORT,__pointers,non_lazy_symbol_pointers + // L_extfoo$non_lazy_ptr: + // .indirect_symbol _extfoo + // .long 0 + // + MachineModuleInfoMachO &MachOMMI = + MMI->getObjFileInfo<MachineModuleInfoMachO>(); + MCContext &Ctx = getContext(); + + // The offset must consider the original displacement from the base symbol + // since 32-bit targets don't have a GOTPCREL to fold the PC displacement. + Offset = -MV.getConstant(); + const MCSymbol *BaseSym = &MV.getSymB()->getSymbol(); + + // Access the final symbol via sym$non_lazy_ptr and generate the appropriated + // non_lazy_ptr stubs. + SmallString<128> Name; + StringRef Suffix = "$non_lazy_ptr"; + Name += DL->getPrivateGlobalPrefix(); + Name += Sym->getName(); + Name += Suffix; + MCSymbol *Stub = Ctx.GetOrCreateSymbol(Name); + + MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(Stub); + if (!StubSym.getPointer()) + StubSym = MachineModuleInfoImpl:: + StubValueTy(const_cast<MCSymbol *>(Sym), true /* access indirectly */); + + const MCExpr *BSymExpr = + MCSymbolRefExpr::Create(BaseSym, MCSymbolRefExpr::VK_None, Ctx); + const MCExpr *LHS = + MCSymbolRefExpr::Create(Stub, MCSymbolRefExpr::VK_None, Ctx); + + if (!Offset) + return MCBinaryExpr::CreateSub(LHS, BSymExpr, Ctx); + + const MCExpr *RHS = + MCBinaryExpr::CreateAdd(BSymExpr, MCConstantExpr::Create(Offset, Ctx), Ctx); + return MCBinaryExpr::CreateSub(LHS, RHS, Ctx); +} + //===----------------------------------------------------------------------===// // COFF //===----------------------------------------------------------------------===// @@ -853,6 +927,11 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, StringRef COMDATSymName = Sym->getName(); return getContext().getCOFFSection(Name, Characteristics, Kind, COMDATSymName, Selection); + } else { + SmallString<256> TmpData; + getNameWithPrefix(TmpData, GV, /*CannotUsePrivateLabel=*/true, Mang, TM); + return getContext().getCOFFSection(Name, Characteristics, Kind, TmpData, + Selection); } } @@ -874,6 +953,42 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, return DataSection; } +void TargetLoweringObjectFileCOFF::getNameWithPrefix( + SmallVectorImpl<char> &OutName, const GlobalValue *GV, + bool CannotUsePrivateLabel, Mangler &Mang, const TargetMachine &TM) const { + if (GV->hasPrivateLinkage() && + ((isa<Function>(GV) && TM.getFunctionSections()) || + (isa<GlobalVariable>(GV) && TM.getDataSections()))) + CannotUsePrivateLabel = true; + + Mang.getNameWithPrefix(OutName, GV, CannotUsePrivateLabel); +} + +const MCSection *TargetLoweringObjectFileCOFF::getSectionForJumpTable( + const Function &F, Mangler &Mang, const TargetMachine &TM) const { + // If the function can be removed, produce a unique section so that + // the table doesn't prevent the removal. + const Comdat *C = F.getComdat(); + bool EmitUniqueSection = TM.getFunctionSections() || C; + if (!EmitUniqueSection) + return ReadOnlySection; + + // FIXME: we should produce a symbol for F instead. + if (F.hasPrivateLinkage()) + return ReadOnlySection; + + MCSymbol *Sym = TM.getSymbol(&F, Mang); + StringRef COMDATSymName = Sym->getName(); + + SectionKind Kind = SectionKind::getReadOnly(); + const char *Name = getCOFFSectionNameForUniqueGlobal(Kind); + unsigned Characteristics = getCOFFSectionFlags(Kind); + Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; + + return getContext().getCOFFSection(Name, Characteristics, Kind, COMDATSymName, + COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE); +} + StringRef TargetLoweringObjectFileCOFF:: getDepLibFromLinkerOpt(StringRef LinkerOption) const { const char *LibCmd = "/DEFAULTLIB:"; diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 1bbe6e1..57daeab 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -45,6 +45,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -102,6 +103,8 @@ class TwoAddressInstructionPass : public MachineFunctionPass { bool sink3AddrInstruction(MachineInstr *MI, unsigned Reg, MachineBasicBlock::iterator OldPos); + bool isRevCopyChain(unsigned FromReg, unsigned ToReg, int Maxlen); + bool noUseAfterLastDef(unsigned Reg, unsigned Dist, unsigned &LastDef); bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, @@ -309,6 +312,45 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, return true; } +/// getSingleDef -- return the MachineInstr* if it is the single def of the Reg +/// in current BB. +static MachineInstr *getSingleDef(unsigned Reg, MachineBasicBlock *BB, + const MachineRegisterInfo *MRI) { + MachineInstr *Ret = nullptr; + for (MachineInstr &DefMI : MRI->def_instructions(Reg)) { + if (DefMI.getParent() != BB || DefMI.isDebugValue()) + continue; + if (!Ret) + Ret = &DefMI; + else if (Ret != &DefMI) + return nullptr; + } + return Ret; +} + +/// Check if there is a reversed copy chain from FromReg to ToReg: +/// %Tmp1 = copy %Tmp2; +/// %FromReg = copy %Tmp1; +/// %ToReg = add %FromReg ... +/// %Tmp2 = copy %ToReg; +/// MaxLen specifies the maximum length of the copy chain the func +/// can walk through. +bool TwoAddressInstructionPass::isRevCopyChain(unsigned FromReg, unsigned ToReg, + int Maxlen) { + unsigned TmpReg = FromReg; + for (int i = 0; i < Maxlen; i++) { + MachineInstr *Def = getSingleDef(TmpReg, MBB, MRI); + if (!Def || !Def->isCopy()) + return false; + + TmpReg = Def->getOperand(1).getReg(); + + if (TmpReg == ToReg) + return true; + } + return false; +} + /// noUseAfterLastDef - Return true if there are no intervening uses between the /// last instruction in the MBB that defines the specified register and the /// two-address instruction which is being processed. It also returns the last @@ -574,6 +616,27 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, if (!noUseAfterLastDef(regB, Dist, LastDefB)) return true; + // Look for situation like this: + // %reg101 = MOV %reg100 + // %reg102 = ... + // %reg103 = ADD %reg102, %reg101 + // ... = %reg103 ... + // %reg100 = MOV %reg103 + // If there is a reversed copy chain from reg101 to reg103, commute the ADD + // to eliminate an otherwise unavoidable copy. + // FIXME: + // We can extend the logic further: If an pair of operands in an insn has + // been merged, the insn could be regarded as a virtual copy, and the virtual + // copy could also be used to construct a copy chain. + // To more generally minimize register copies, ideally the logic of two addr + // instruction pass should be integrated with register allocation pass where + // interference graph is available. + if (isRevCopyChain(regC, regA, 3)) + return true; + + if (isRevCopyChain(regB, regA, 3)) + return false; + // Since there are no intervening uses for both registers, then commute // if the def of regC is closer. Its live interval is shorter. return LastDefB && LastDefC && LastDefC > LastDefB; diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index 7d3b0ce..d9adfdf 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -286,7 +286,7 @@ void VirtRegRewriter::addMBBLiveIns() { } void VirtRegRewriter::rewrite() { - bool NoSubRegLiveness = !MRI->tracksSubRegLiveness(); + bool NoSubRegLiveness = !MRI->subRegLivenessEnabled(); SmallVector<unsigned, 8> SuperDeads; SmallVector<unsigned, 8> SuperDefs; SmallVector<unsigned, 8> SuperKills; diff --git a/lib/CodeGen/WinEHPrepare.cpp b/lib/CodeGen/WinEHPrepare.cpp index 6f712a9..ab0f96e 100644 --- a/lib/CodeGen/WinEHPrepare.cpp +++ b/lib/CodeGen/WinEHPrepare.cpp @@ -16,6 +16,8 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/Analysis/LibCallSemantics.h" #include "llvm/IR/Function.h" @@ -25,6 +27,10 @@ #include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" #include <memory> @@ -36,25 +42,31 @@ using namespace llvm::PatternMatch; namespace { -struct HandlerAllocas { - TinyPtrVector<AllocaInst *> Allocas; - int ParentFrameAllocationIndex; -}; - // This map is used to model frame variable usage during outlining, to // construct a structure type to hold the frame variables in a frame // allocation block, and to remap the frame variable allocas (including // spill locations as needed) to GEPs that get the variable from the // frame allocation structure. -typedef MapVector<AllocaInst *, HandlerAllocas> FrameVarInfoMap; +typedef MapVector<Value *, TinyPtrVector<AllocaInst *>> FrameVarInfoMap; -class WinEHPrepare : public FunctionPass { - std::unique_ptr<FunctionPass> DwarfPrepare; +typedef SmallSet<BasicBlock *, 4> VisitedBlockSet; + +enum ActionType { Catch, Cleanup }; + +class LandingPadActions; +class ActionHandler; +class CatchHandler; +class CleanupHandler; +class LandingPadMap; +typedef DenseMap<const BasicBlock *, CatchHandler *> CatchHandlerMapTy; +typedef DenseMap<const BasicBlock *, CleanupHandler *> CleanupHandlerMapTy; + +class WinEHPrepare : public FunctionPass { public: static char ID; // Pass identification, replacement for typeid. WinEHPrepare(const TargetMachine *TM = nullptr) - : FunctionPass(ID), DwarfPrepare(createDwarfEHPass(TM)) {} + : FunctionPass(ID) {} bool runOnFunction(Function &Fn) override; @@ -67,11 +79,24 @@ public: } private: - bool prepareCPPEHHandlers(Function &F, - SmallVectorImpl<LandingPadInst *> &LPads); - bool outlineCatchHandler(Function *SrcFn, Constant *SelectorType, - LandingPadInst *LPad, CallInst *&EHAlloc, - AllocaInst *&EHObjPtr, FrameVarInfoMap &VarInfo); + bool prepareExceptionHandlers(Function &F, + SmallVectorImpl<LandingPadInst *> &LPads); + bool outlineHandler(ActionHandler *Action, Function *SrcFn, + LandingPadInst *LPad, BasicBlock *StartBB, + FrameVarInfoMap &VarInfo); + + void mapLandingPadBlocks(LandingPadInst *LPad, LandingPadActions &Actions); + CatchHandler *findCatchHandler(BasicBlock *BB, BasicBlock *&NextBB, + VisitedBlockSet &VisitedBlocks); + CleanupHandler *findCleanupHandler(BasicBlock *StartBB, BasicBlock *EndBB); + + void processSEHCatchHandler(CatchHandler *Handler, BasicBlock *StartBB); + + // All fields are reset by runOnFunction. + EHPersonality Personality; + CatchHandlerMapTy CatchHandlerMap; + CleanupHandlerMapTy CleanupHandlerMap; + DenseMap<const LandingPadInst *, LandingPadMap> LPadMaps; }; class WinEHFrameVariableMaterializer : public ValueMaterializer { @@ -87,34 +112,218 @@ private: IRBuilder<> Builder; }; -class WinEHCatchDirector : public CloningDirector { +class LandingPadMap { +public: + LandingPadMap() : OriginLPad(nullptr) {} + void mapLandingPad(const LandingPadInst *LPad); + + bool isInitialized() { return OriginLPad != nullptr; } + + bool mapIfEHPtrLoad(const LoadInst *Load) { + return mapIfEHLoad(Load, EHPtrStores, EHPtrStoreAddrs); + } + bool mapIfSelectorLoad(const LoadInst *Load) { + return mapIfEHLoad(Load, SelectorStores, SelectorStoreAddrs); + } + + bool isLandingPadSpecificInst(const Instruction *Inst) const; + + void remapSelector(ValueToValueMapTy &VMap, Value *MappedValue) const; + +private: + bool mapIfEHLoad(const LoadInst *Load, + SmallVectorImpl<const StoreInst *> &Stores, + SmallVectorImpl<const Value *> &StoreAddrs); + + const LandingPadInst *OriginLPad; + // We will normally only see one of each of these instructions, but + // if more than one occurs for some reason we can handle that. + TinyPtrVector<const ExtractValueInst *> ExtractedEHPtrs; + TinyPtrVector<const ExtractValueInst *> ExtractedSelectors; + + // In optimized code, there will typically be at most one instance of + // each of the following, but in unoptimized IR it is not uncommon + // for the values to be stored, loaded and then stored again. In that + // case we will create a second entry for each store and store address. + SmallVector<const StoreInst *, 2> EHPtrStores; + SmallVector<const StoreInst *, 2> SelectorStores; + SmallVector<const Value *, 2> EHPtrStoreAddrs; + SmallVector<const Value *, 2> SelectorStoreAddrs; +}; + +class WinEHCloningDirectorBase : public CloningDirector { public: - WinEHCatchDirector(LandingPadInst *LPI, Function *CatchFn, Value *Selector, - Value *EHObj, FrameVarInfoMap &VarInfo) - : LPI(LPI), CurrentSelector(Selector->stripPointerCasts()), EHObj(EHObj), - Materializer(CatchFn, VarInfo), - SelectorIDType(Type::getInt32Ty(LPI->getContext())), - Int8PtrType(Type::getInt8PtrTy(LPI->getContext())) {} + WinEHCloningDirectorBase(Function *HandlerFn, + FrameVarInfoMap &VarInfo, + LandingPadMap &LPadMap) + : Materializer(HandlerFn, VarInfo), + SelectorIDType(Type::getInt32Ty(HandlerFn->getContext())), + Int8PtrType(Type::getInt8PtrTy(HandlerFn->getContext())), + LPadMap(LPadMap) {} CloningAction handleInstruction(ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) override; + virtual CloningAction handleBeginCatch(ValueToValueMapTy &VMap, + const Instruction *Inst, + BasicBlock *NewBB) = 0; + virtual CloningAction handleEndCatch(ValueToValueMapTy &VMap, + const Instruction *Inst, + BasicBlock *NewBB) = 0; + virtual CloningAction handleTypeIdFor(ValueToValueMapTy &VMap, + const Instruction *Inst, + BasicBlock *NewBB) = 0; + virtual CloningAction handleInvoke(ValueToValueMapTy &VMap, + const InvokeInst *Invoke, + BasicBlock *NewBB) = 0; + virtual CloningAction handleResume(ValueToValueMapTy &VMap, + const ResumeInst *Resume, + BasicBlock *NewBB) = 0; + ValueMaterializer *getValueMaterializer() override { return &Materializer; } -private: - LandingPadInst *LPI; - Value *CurrentSelector; - Value *EHObj; +protected: WinEHFrameVariableMaterializer Materializer; Type *SelectorIDType; Type *Int8PtrType; + LandingPadMap &LPadMap; +}; + +class WinEHCatchDirector : public WinEHCloningDirectorBase { +public: + WinEHCatchDirector(Function *CatchFn, Value *Selector, + FrameVarInfoMap &VarInfo, LandingPadMap &LPadMap) + : WinEHCloningDirectorBase(CatchFn, VarInfo, LPadMap), + CurrentSelector(Selector->stripPointerCasts()), + ExceptionObjectVar(nullptr) {} + + CloningAction handleBeginCatch(ValueToValueMapTy &VMap, + const Instruction *Inst, + BasicBlock *NewBB) override; + CloningAction handleEndCatch(ValueToValueMapTy &VMap, const Instruction *Inst, + BasicBlock *NewBB) override; + CloningAction handleTypeIdFor(ValueToValueMapTy &VMap, + const Instruction *Inst, + BasicBlock *NewBB) override; + CloningAction handleInvoke(ValueToValueMapTy &VMap, const InvokeInst *Invoke, + BasicBlock *NewBB) override; + CloningAction handleResume(ValueToValueMapTy &VMap, const ResumeInst *Resume, + BasicBlock *NewBB) override; + + const Value *getExceptionVar() { return ExceptionObjectVar; } + TinyPtrVector<BasicBlock *> &getReturnTargets() { return ReturnTargets; } + +private: + Value *CurrentSelector; - const Value *ExtractedEHPtr; - const Value *ExtractedSelector; - const Value *EHPtrStoreAddr; - const Value *SelectorStoreAddr; + const Value *ExceptionObjectVar; + TinyPtrVector<BasicBlock *> ReturnTargets; }; + +class WinEHCleanupDirector : public WinEHCloningDirectorBase { +public: + WinEHCleanupDirector(Function *CleanupFn, + FrameVarInfoMap &VarInfo, LandingPadMap &LPadMap) + : WinEHCloningDirectorBase(CleanupFn, VarInfo, LPadMap) {} + + CloningAction handleBeginCatch(ValueToValueMapTy &VMap, + const Instruction *Inst, + BasicBlock *NewBB) override; + CloningAction handleEndCatch(ValueToValueMapTy &VMap, const Instruction *Inst, + BasicBlock *NewBB) override; + CloningAction handleTypeIdFor(ValueToValueMapTy &VMap, + const Instruction *Inst, + BasicBlock *NewBB) override; + CloningAction handleInvoke(ValueToValueMapTy &VMap, const InvokeInst *Invoke, + BasicBlock *NewBB) override; + CloningAction handleResume(ValueToValueMapTy &VMap, const ResumeInst *Resume, + BasicBlock *NewBB) override; +}; + +class ActionHandler { +public: + ActionHandler(BasicBlock *BB, ActionType Type) + : StartBB(BB), Type(Type), HandlerBlockOrFunc(nullptr) {} + + ActionType getType() const { return Type; } + BasicBlock *getStartBlock() const { return StartBB; } + + bool hasBeenProcessed() { return HandlerBlockOrFunc != nullptr; } + + void setHandlerBlockOrFunc(Constant *F) { HandlerBlockOrFunc = F; } + Constant *getHandlerBlockOrFunc() { return HandlerBlockOrFunc; } + +private: + BasicBlock *StartBB; + ActionType Type; + + // Can be either a BlockAddress or a Function depending on the EH personality. + Constant *HandlerBlockOrFunc; +}; + +class CatchHandler : public ActionHandler { +public: + CatchHandler(BasicBlock *BB, Constant *Selector, BasicBlock *NextBB) + : ActionHandler(BB, ActionType::Catch), Selector(Selector), + NextBB(NextBB), ExceptionObjectVar(nullptr) {} + + // Method for support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const ActionHandler *H) { + return H->getType() == ActionType::Catch; + } + + Constant *getSelector() const { return Selector; } + BasicBlock *getNextBB() const { return NextBB; } + + const Value *getExceptionVar() { return ExceptionObjectVar; } + TinyPtrVector<BasicBlock *> &getReturnTargets() { return ReturnTargets; } + + void setExceptionVar(const Value *Val) { ExceptionObjectVar = Val; } + void setReturnTargets(TinyPtrVector<BasicBlock *> &Targets) { + ReturnTargets = Targets; + } + +private: + Constant *Selector; + BasicBlock *NextBB; + const Value *ExceptionObjectVar; + TinyPtrVector<BasicBlock *> ReturnTargets; +}; + +class CleanupHandler : public ActionHandler { +public: + CleanupHandler(BasicBlock *BB) : ActionHandler(BB, ActionType::Cleanup) {} + + // Method for support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const ActionHandler *H) { + return H->getType() == ActionType::Cleanup; + } +}; + +class LandingPadActions { +public: + LandingPadActions() : HasCleanupHandlers(false) {} + + void insertCatchHandler(CatchHandler *Action) { Actions.push_back(Action); } + void insertCleanupHandler(CleanupHandler *Action) { + Actions.push_back(Action); + HasCleanupHandlers = true; + } + + bool includesCleanup() const { return HasCleanupHandlers; } + + SmallVectorImpl<ActionHandler *>::iterator begin() { return Actions.begin(); } + SmallVectorImpl<ActionHandler *>::iterator end() { return Actions.end(); } + +private: + // Note that this class does not own the ActionHandler objects in this vector. + // The ActionHandlers are owned by the CatchHandlerMap and CleanupHandlerMap + // in the WinEHPrepare class. + SmallVector<ActionHandler *, 4> Actions; + bool HasCleanupHandlers; +}; + } // end anonymous namespace char WinEHPrepare::ID = 0; @@ -125,10 +334,10 @@ FunctionPass *llvm::createWinEHPass(const TargetMachine *TM) { return new WinEHPrepare(TM); } -static bool isMSVCPersonality(EHPersonality Pers) { - return Pers == EHPersonality::MSVC_Win64SEH || - Pers == EHPersonality::MSVC_CXX; -} +// FIXME: Remove this once the backend can handle the prepared IR. +static cl::opt<bool> +SEHPrepare("sehprepare", cl::Hidden, + cl::desc("Prepare functions with SEH personalities")); bool WinEHPrepare::runOnFunction(Function &Fn) { SmallVector<LandingPadInst *, 4> LPads; @@ -145,60 +354,67 @@ bool WinEHPrepare::runOnFunction(Function &Fn) { return false; // Classify the personality to see what kind of preparation we need. - EHPersonality Pers = classifyEHPersonality(LPads.back()->getPersonalityFn()); - - // Delegate through to the DWARF pass if this is unrecognized. - if (!isMSVCPersonality(Pers)) - return DwarfPrepare->runOnFunction(Fn); + Personality = classifyEHPersonality(LPads.back()->getPersonalityFn()); - // FIXME: This only returns true if the C++ EH handlers were outlined. - // When that code is complete, it should always return whatever - // prepareCPPEHHandlers returns. - if (Pers == EHPersonality::MSVC_CXX && prepareCPPEHHandlers(Fn, LPads)) - return true; - - // FIXME: SEH Cleanups are unimplemented. Replace them with unreachable. - if (Resumes.empty()) + // Do nothing if this is not an MSVC personality. + if (!isMSVCEHPersonality(Personality)) return false; - for (ResumeInst *Resume : Resumes) { - IRBuilder<>(Resume).CreateUnreachable(); - Resume->eraseFromParent(); + if (isAsynchronousEHPersonality(Personality) && !SEHPrepare) { + // Replace all resume instructions with unreachable. + // FIXME: Remove this once the backend can handle the prepared IR. + for (ResumeInst *Resume : Resumes) { + IRBuilder<>(Resume).CreateUnreachable(); + Resume->eraseFromParent(); + } + return true; } + // If there were any landing pads, prepareExceptionHandlers will make changes. + prepareExceptionHandlers(Fn, LPads); return true; } bool WinEHPrepare::doFinalization(Module &M) { - return DwarfPrepare->doFinalization(M); + return false; } -void WinEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const { - DwarfPrepare->getAnalysisUsage(AU); -} +void WinEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const {} -bool WinEHPrepare::prepareCPPEHHandlers( +bool WinEHPrepare::prepareExceptionHandlers( Function &F, SmallVectorImpl<LandingPadInst *> &LPads) { // These containers are used to re-map frame variables that are used in // outlined catch and cleanup handlers. They will be populated as the // handlers are outlined. FrameVarInfoMap FrameVarInfo; - SmallVector<CallInst *, 4> HandlerAllocs; - SmallVector<AllocaInst *, 4> HandlerEHObjPtrs; bool HandlersOutlined = false; + Module *M = F.getParent(); + LLVMContext &Context = M->getContext(); + + // Create a new function to receive the handler contents. + PointerType *Int8PtrType = Type::getInt8PtrTy(Context); + Type *Int32Type = Type::getInt32Ty(Context); + Function *ActionIntrin = Intrinsic::getDeclaration(M, Intrinsic::eh_actions); + for (LandingPadInst *LPad : LPads) { // Look for evidence that this landingpad has already been processed. bool LPadHasActionList = false; BasicBlock *LPadBB = LPad->getParent(); - for (Instruction &Inst : LPadBB->getInstList()) { - // FIXME: Make this an intrinsic. - if (auto *Call = dyn_cast<CallInst>(&Inst)) - if (Call->getCalledFunction()->getName() == "llvm.eh.actions") { + for (Instruction &Inst : *LPadBB) { + if (auto *IntrinCall = dyn_cast<IntrinsicInst>(&Inst)) { + if (IntrinCall->getIntrinsicID() == Intrinsic::eh_actions) { LPadHasActionList = true; break; } + } + // FIXME: This is here to help with the development of nested landing pad + // outlining. It should be removed when that is finished. + if (isa<UnreachableInst>(Inst)) { + LPadHasActionList = true; + break; + } } // If we've already outlined the handlers for this landingpad, @@ -206,177 +422,244 @@ bool WinEHPrepare::prepareCPPEHHandlers( if (LPadHasActionList) continue; - for (unsigned Idx = 0, NumClauses = LPad->getNumClauses(); Idx < NumClauses; - ++Idx) { - if (LPad->isCatch(Idx)) { - // Create a new instance of the handler data structure in the - // HandlerData vector. - CallInst *EHAlloc = nullptr; - AllocaInst *EHObjPtr = nullptr; - bool Outlined = outlineCatchHandler(&F, LPad->getClause(Idx), LPad, - EHAlloc, EHObjPtr, FrameVarInfo); - if (Outlined) { + LandingPadActions Actions; + mapLandingPadBlocks(LPad, Actions); + + for (ActionHandler *Action : Actions) { + if (Action->hasBeenProcessed()) + continue; + BasicBlock *StartBB = Action->getStartBlock(); + + // SEH doesn't do any outlining for catches. Instead, pass the handler + // basic block addr to llvm.eh.actions and list the block as a return + // target. + if (isAsynchronousEHPersonality(Personality)) { + if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) { + processSEHCatchHandler(CatchAction, StartBB); HandlersOutlined = true; - // These values must be resolved after all handlers have been - // outlined. - if (EHAlloc) - HandlerAllocs.push_back(EHAlloc); - if (EHObjPtr) - HandlerEHObjPtrs.push_back(EHObjPtr); + continue; } - } // End if (isCatch) - } // End for each clause - } // End for each landingpad + } + + if (outlineHandler(Action, &F, LPad, StartBB, FrameVarInfo)) { + HandlersOutlined = true; + } + } // End for each Action + + // FIXME: We need a guard against partially outlined functions. + if (!HandlersOutlined) + continue; + + // Replace the landing pad with a new llvm.eh.action based landing pad. + BasicBlock *NewLPadBB = BasicBlock::Create(Context, "lpad", &F, LPadBB); + assert(!isa<PHINode>(LPadBB->begin())); + Instruction *NewLPad = LPad->clone(); + NewLPadBB->getInstList().push_back(NewLPad); + while (!pred_empty(LPadBB)) { + auto *pred = *pred_begin(LPadBB); + InvokeInst *Invoke = cast<InvokeInst>(pred->getTerminator()); + Invoke->setUnwindDest(NewLPadBB); + } + + // Replace uses of the old lpad in phis with this block and delete the old + // block. + LPadBB->replaceSuccessorsPhiUsesWith(NewLPadBB); + LPadBB->getTerminator()->eraseFromParent(); + new UnreachableInst(LPadBB->getContext(), LPadBB); + + // Add a call to describe the actions for this landing pad. + std::vector<Value *> ActionArgs; + for (ActionHandler *Action : Actions) { + // Action codes from docs are: 0 cleanup, 1 catch. + if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) { + ActionArgs.push_back(ConstantInt::get(Int32Type, 1)); + ActionArgs.push_back(CatchAction->getSelector()); + Value *EHObj = const_cast<Value *>(CatchAction->getExceptionVar()); + if (EHObj) + ActionArgs.push_back(EHObj); + else + ActionArgs.push_back(ConstantPointerNull::get(Int8PtrType)); + } else { + ActionArgs.push_back(ConstantInt::get(Int32Type, 0)); + } + ActionArgs.push_back(Action->getHandlerBlockOrFunc()); + } + CallInst *Recover = + CallInst::Create(ActionIntrin, ActionArgs, "recover", NewLPadBB); + + // Add an indirect branch listing possible successors of the catch handlers. + IndirectBrInst *Branch = IndirectBrInst::Create(Recover, 0, NewLPadBB); + for (ActionHandler *Action : Actions) { + if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) { + for (auto *Target : CatchAction->getReturnTargets()) { + Branch->addDestination(Target); + } + } + } + } // End for each landingpad // If nothing got outlined, there is no more processing to be done. if (!HandlersOutlined) return false; - // FIXME: We will replace the landingpad bodies with llvm.eh.actions - // calls and indirect branches here and then delete blocks - // which are no longer reachable. That will get rid of the - // handlers that we have outlined. There is code below - // that looks for allocas with no uses in the parent function. - // That will only happen after the pruning is implemented. - - // Remap the frame variables. - SmallVector<Type *, 2> StructTys; - StructTys.push_back(Type::getInt32Ty(F.getContext())); // EH state - StructTys.push_back(Type::getInt8PtrTy(F.getContext())); // EH object - - // Start the index at two since we always have the above fields at 0 and 1. - int Idx = 2; - - // FIXME: Sort the FrameVarInfo vector by the ParentAlloca size and alignment - // and add padding as necessary to provide the proper alignment. - - // Map the alloca instructions to the corresponding index in the - // frame allocation structure. If any alloca is used only in a single - // handler and is not used in the parent frame after outlining, it will - // be assigned an index of -1, meaning the handler can keep its - // "temporary" alloca and the original alloca can be erased from the - // parent function. If we later encounter this alloca in a second - // handler, we will assign it a place in the frame allocation structure - // at that time. Since the instruction replacement doesn't happen until - // all the entries in the HandlerData have been processed this isn't a - // problem. - for (auto &VarInfoEntry : FrameVarInfo) { - AllocaInst *ParentAlloca = VarInfoEntry.first; - HandlerAllocas &AllocaInfo = VarInfoEntry.second; - - // If the instruction still has uses in the parent function or if it is - // referenced by more than one handler, add it to the frame allocation - // structure. - if (ParentAlloca->getNumUses() != 0 || AllocaInfo.Allocas.size() > 1) { - Type *VarTy = ParentAlloca->getAllocatedType(); - StructTys.push_back(VarTy); - AllocaInfo.ParentFrameAllocationIndex = Idx++; - } else { - // If the variable is not used in the parent frame and it is only used - // in one handler, the alloca can be removed from the parent frame - // and the handler will keep its "temporary" alloca to define the value. - // An element index of -1 is used to indicate this condition. - AllocaInfo.ParentFrameAllocationIndex = -1; - } - } + // Delete any blocks that were only used by handlers that were outlined above. + removeUnreachableBlocks(F); - // Having filled the StructTys vector and assigned an index to each element, - // we can now create the structure. - StructType *EHDataStructTy = StructType::create( - F.getContext(), StructTys, "struct." + F.getName().str() + ".ehdata"); - IRBuilder<> Builder(F.getParent()->getContext()); - - // Create a frame allocation. - Module *M = F.getParent(); - LLVMContext &Context = M->getContext(); BasicBlock *Entry = &F.getEntryBlock(); + IRBuilder<> Builder(F.getParent()->getContext()); Builder.SetInsertPoint(Entry->getFirstInsertionPt()); - Function *FrameAllocFn = - Intrinsic::getDeclaration(M, Intrinsic::frameallocate); - uint64_t EHAllocSize = M->getDataLayout()->getTypeAllocSize(EHDataStructTy); - Value *FrameAllocArgs[] = { - ConstantInt::get(Type::getInt32Ty(Context), EHAllocSize)}; - CallInst *FrameAlloc = - Builder.CreateCall(FrameAllocFn, FrameAllocArgs, "frame.alloc"); - - Value *FrameEHData = Builder.CreateBitCast( - FrameAlloc, EHDataStructTy->getPointerTo(), "eh.data"); - - // Now visit each handler that is using the structure and bitcast its EHAlloc - // value to be a pointer to the frame alloc structure. - DenseMap<Function *, Value *> EHDataMap; - for (CallInst *EHAlloc : HandlerAllocs) { - // The EHAlloc has no uses at this time, so we need to just insert the - // cast before the next instruction. There is always a next instruction. - BasicBlock::iterator II = EHAlloc; - ++II; - Builder.SetInsertPoint(cast<Instruction>(II)); - Value *EHData = Builder.CreateBitCast( - EHAlloc, EHDataStructTy->getPointerTo(), "eh.data"); - EHDataMap[EHAlloc->getParent()->getParent()] = EHData; - } - // Next, replace the place-holder EHObjPtr allocas with GEP instructions - // that pull the EHObjPtr from the frame alloc structure - for (AllocaInst *EHObjPtr : HandlerEHObjPtrs) { - Value *EHData = EHDataMap[EHObjPtr->getParent()->getParent()]; - Builder.SetInsertPoint(EHObjPtr); - Value *ElementPtr = Builder.CreateConstInBoundsGEP2_32(EHData, 0, 1); - EHObjPtr->replaceAllUsesWith(ElementPtr); - EHObjPtr->removeFromParent(); - ElementPtr->takeName(EHObjPtr); - delete EHObjPtr; - } + Function *FrameEscapeFn = + Intrinsic::getDeclaration(M, Intrinsic::frameescape); + Function *RecoverFrameFn = + Intrinsic::getDeclaration(M, Intrinsic::framerecover); // Finally, replace all of the temporary allocas for frame variables used in - // the outlined handlers and the original frame allocas with GEP instructions - // that get the equivalent pointer from the frame allocation struct. + // the outlined handlers with calls to llvm.framerecover. + BasicBlock::iterator II = Entry->getFirstInsertionPt(); + Instruction *AllocaInsertPt = II; + SmallVector<Value *, 8> AllocasToEscape; for (auto &VarInfoEntry : FrameVarInfo) { - AllocaInst *ParentAlloca = VarInfoEntry.first; - HandlerAllocas &AllocaInfo = VarInfoEntry.second; - int Idx = AllocaInfo.ParentFrameAllocationIndex; - - // If we have an index of -1 for this instruction, it means it isn't used - // outside of this handler. In that case, we just keep the "temporary" - // alloca in the handler and erase the original alloca from the parent. - if (Idx == -1) { + Value *ParentVal = VarInfoEntry.first; + TinyPtrVector<AllocaInst *> &Allocas = VarInfoEntry.second; + + // If the mapped value isn't already an alloca, we need to spill it if it + // is a computed value or copy it if it is an argument. + AllocaInst *ParentAlloca = dyn_cast<AllocaInst>(ParentVal); + if (!ParentAlloca) { + if (auto *Arg = dyn_cast<Argument>(ParentVal)) { + // Lower this argument to a copy and then demote that to the stack. + // We can't just use the argument location because the handler needs + // it to be in the frame allocation block. + // Use 'select i8 true, %arg, undef' to simulate a 'no-op' instruction. + Value *TrueValue = ConstantInt::getTrue(Context); + Value *UndefValue = UndefValue::get(Arg->getType()); + Instruction *SI = + SelectInst::Create(TrueValue, Arg, UndefValue, + Arg->getName() + ".tmp", AllocaInsertPt); + Arg->replaceAllUsesWith(SI); + // Reset the select operand, because it was clobbered by the RAUW above. + SI->setOperand(1, Arg); + ParentAlloca = DemoteRegToStack(*SI, true, SI); + } else if (auto *PN = dyn_cast<PHINode>(ParentVal)) { + ParentAlloca = DemotePHIToStack(PN, AllocaInsertPt); + } else { + Instruction *ParentInst = cast<Instruction>(ParentVal); + // FIXME: This is a work-around to temporarily handle the case where an + // instruction that is only used in handlers is not sunk. + // Without uses, DemoteRegToStack would just eliminate the value. + // This will fail if ParentInst is an invoke. + if (ParentInst->getNumUses() == 0) { + BasicBlock::iterator InsertPt = ParentInst; + ++InsertPt; + ParentAlloca = + new AllocaInst(ParentInst->getType(), nullptr, + ParentInst->getName() + ".reg2mem", InsertPt); + new StoreInst(ParentInst, ParentAlloca, InsertPt); + } else { + ParentAlloca = DemoteRegToStack(*ParentInst, true, ParentInst); + } + } + } + + // If the parent alloca is no longer used and only one of the handlers used + // it, erase the parent and leave the copy in the outlined handler. + if (ParentAlloca->getNumUses() == 0 && Allocas.size() == 1) { ParentAlloca->eraseFromParent(); - } else { - // Otherwise, we replace the parent alloca and all outlined allocas - // which map to it with GEP instructions. - - // First replace the original alloca. - Builder.SetInsertPoint(ParentAlloca); - Builder.SetCurrentDebugLocation(ParentAlloca->getDebugLoc()); - Value *ElementPtr = - Builder.CreateConstInBoundsGEP2_32(FrameEHData, 0, Idx); - ParentAlloca->replaceAllUsesWith(ElementPtr); - ParentAlloca->removeFromParent(); - ElementPtr->takeName(ParentAlloca); - delete ParentAlloca; - - // Next replace all outlined allocas that are mapped to it. - for (AllocaInst *TempAlloca : AllocaInfo.Allocas) { - Value *EHData = EHDataMap[TempAlloca->getParent()->getParent()]; - // FIXME: Sink this GEP into the blocks where it is used. - Builder.SetInsertPoint(TempAlloca); - Builder.SetCurrentDebugLocation(TempAlloca->getDebugLoc()); - ElementPtr = Builder.CreateConstInBoundsGEP2_32(EHData, 0, Idx); - TempAlloca->replaceAllUsesWith(ElementPtr); - TempAlloca->removeFromParent(); - ElementPtr->takeName(TempAlloca); - delete TempAlloca; + continue; + } + + // Add this alloca to the list of things to escape. + AllocasToEscape.push_back(ParentAlloca); + + // Next replace all outlined allocas that are mapped to it. + for (AllocaInst *TempAlloca : Allocas) { + Function *HandlerFn = TempAlloca->getParent()->getParent(); + // FIXME: Sink this GEP into the blocks where it is used. + Builder.SetInsertPoint(TempAlloca); + Builder.SetCurrentDebugLocation(TempAlloca->getDebugLoc()); + Value *RecoverArgs[] = { + Builder.CreateBitCast(&F, Int8PtrType, ""), + &(HandlerFn->getArgumentList().back()), + llvm::ConstantInt::get(Int32Type, AllocasToEscape.size() - 1)}; + Value *RecoveredAlloca = Builder.CreateCall(RecoverFrameFn, RecoverArgs); + // Add a pointer bitcast if the alloca wasn't an i8. + if (RecoveredAlloca->getType() != TempAlloca->getType()) { + RecoveredAlloca->setName(Twine(TempAlloca->getName()) + ".i8"); + RecoveredAlloca = + Builder.CreateBitCast(RecoveredAlloca, TempAlloca->getType()); } - } // end else of if (Idx == -1) - } // End for each FrameVarInfo entry. + TempAlloca->replaceAllUsesWith(RecoveredAlloca); + TempAlloca->removeFromParent(); + RecoveredAlloca->takeName(TempAlloca); + delete TempAlloca; + } + } // End for each FrameVarInfo entry. + + // Insert 'call void (...)* @llvm.frameescape(...)' at the end of the entry + // block. + Builder.SetInsertPoint(&F.getEntryBlock().back()); + Builder.CreateCall(FrameEscapeFn, AllocasToEscape); + + // Insert an alloca for the EH state in the entry block. On x86, we will also + // insert stores to update the EH state, but on other ISAs, the runtime does + // it for us. + // FIXME: This record is different on x86. + Type *UnwindHelpTy = Type::getInt64Ty(Context); + AllocaInst *UnwindHelp = + new AllocaInst(UnwindHelpTy, "unwindhelp", &F.getEntryBlock().front()); + Builder.CreateStore(llvm::ConstantInt::get(UnwindHelpTy, -2), UnwindHelp); + Function *UnwindHelpFn = + Intrinsic::getDeclaration(M, Intrinsic::eh_unwindhelp); + Builder.CreateCall(UnwindHelpFn, + Builder.CreateBitCast(UnwindHelp, Int8PtrType)); + + // Clean up the handler action maps we created for this function + DeleteContainerSeconds(CatchHandlerMap); + CatchHandlerMap.clear(); + DeleteContainerSeconds(CleanupHandlerMap); + CleanupHandlerMap.clear(); return HandlersOutlined; } -bool WinEHPrepare::outlineCatchHandler(Function *SrcFn, Constant *SelectorType, - LandingPadInst *LPad, CallInst *&EHAlloc, - AllocaInst *&EHObjPtr, - FrameVarInfoMap &VarInfo) { +// This function examines a block to determine whether the block ends with a +// conditional branch to a catch handler based on a selector comparison. +// This function is used both by the WinEHPrepare::findSelectorComparison() and +// WinEHCleanupDirector::handleTypeIdFor(). +static bool isSelectorDispatch(BasicBlock *BB, BasicBlock *&CatchHandler, + Constant *&Selector, BasicBlock *&NextBB) { + ICmpInst::Predicate Pred; + BasicBlock *TBB, *FBB; + Value *LHS, *RHS; + + if (!match(BB->getTerminator(), + m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)), TBB, FBB))) + return false; + + if (!match(LHS, + m_Intrinsic<Intrinsic::eh_typeid_for>(m_Constant(Selector))) && + !match(RHS, m_Intrinsic<Intrinsic::eh_typeid_for>(m_Constant(Selector)))) + return false; + + if (Pred == CmpInst::ICMP_EQ) { + CatchHandler = TBB; + NextBB = FBB; + return true; + } + + if (Pred == CmpInst::ICMP_NE) { + CatchHandler = FBB; + NextBB = TBB; + return true; + } + + return false; +} + +bool WinEHPrepare::outlineHandler(ActionHandler *Action, Function *SrcFn, + LandingPadInst *LPad, BasicBlock *StartBB, + FrameVarInfoMap &VarInfo) { Module *M = SrcFn->getParent(); LLVMContext &Context = M->getContext(); @@ -385,133 +668,241 @@ bool WinEHPrepare::outlineCatchHandler(Function *SrcFn, Constant *SelectorType, std::vector<Type *> ArgTys; ArgTys.push_back(Int8PtrType); ArgTys.push_back(Int8PtrType); - FunctionType *FnType = FunctionType::get(Int8PtrType, ArgTys, false); - Function *CatchHandler = Function::Create( - FnType, GlobalVariable::ExternalLinkage, SrcFn->getName() + ".catch", M); + Function *Handler; + if (Action->getType() == Catch) { + FunctionType *FnType = FunctionType::get(Int8PtrType, ArgTys, false); + Handler = Function::Create(FnType, GlobalVariable::InternalLinkage, + SrcFn->getName() + ".catch", M); + } else { + FunctionType *FnType = + FunctionType::get(Type::getVoidTy(Context), ArgTys, false); + Handler = Function::Create(FnType, GlobalVariable::InternalLinkage, + SrcFn->getName() + ".cleanup", M); + } // Generate a standard prolog to setup the frame recovery structure. IRBuilder<> Builder(Context); - BasicBlock *Entry = BasicBlock::Create(Context, "catch.entry"); - CatchHandler->getBasicBlockList().push_front(Entry); + BasicBlock *Entry = BasicBlock::Create(Context, "entry"); + Handler->getBasicBlockList().push_front(Entry); Builder.SetInsertPoint(Entry); Builder.SetCurrentDebugLocation(LPad->getDebugLoc()); - // The outlined handler will be called with the parent's frame pointer as - // its second argument. To enable the handler to access variables from - // the parent frame, we use that pointer to get locate a special block - // of memory that was allocated using llvm.eh.allocateframe for this - // purpose. During the outlining process we will determine which frame - // variables are used in handlers and create a structure that maps these - // variables into the frame allocation block. - // - // The frame allocation block also contains an exception state variable - // used by the runtime and a pointer to the exception object pointer - // which will be filled in by the runtime for use in the handler. - Function *RecoverFrameFn = - Intrinsic::getDeclaration(M, Intrinsic::framerecover); - Value *RecoverArgs[] = {Builder.CreateBitCast(SrcFn, Int8PtrType, ""), - &(CatchHandler->getArgumentList().back())}; - EHAlloc = Builder.CreateCall(RecoverFrameFn, RecoverArgs, "eh.alloc"); - - // This alloca is only temporary. We'll be replacing it once we know all the - // frame variables that need to go in the frame allocation structure. - EHObjPtr = Builder.CreateAlloca(Int8PtrType, 0, "eh.obj.ptr"); - - // This will give us a raw pointer to the exception object, which - // corresponds to the formal parameter of the catch statement. If the - // handler uses this object, we will generate code during the outlining - // process to cast the pointer to the appropriate type and deference it - // as necessary. The un-outlined landing pad code represents the - // exception object as the result of the llvm.eh.begincatch call. - Value *EHObj = Builder.CreateLoad(EHObjPtr, false, "eh.obj"); + std::unique_ptr<WinEHCloningDirectorBase> Director; ValueToValueMapTy VMap; - // FIXME: Map other values referenced in the filter handler. - - WinEHCatchDirector Director(LPad, CatchHandler, SelectorType, EHObj, VarInfo); + LandingPadMap &LPadMap = LPadMaps[LPad]; + if (!LPadMap.isInitialized()) + LPadMap.mapLandingPad(LPad); + if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) { + Constant *Sel = CatchAction->getSelector(); + Director.reset(new WinEHCatchDirector(Handler, Sel, VarInfo, LPadMap)); + LPadMap.remapSelector(VMap, ConstantInt::get(Type::getInt32Ty(Context), 1)); + } else { + Director.reset(new WinEHCleanupDirector(Handler, VarInfo, LPadMap)); + } SmallVector<ReturnInst *, 8> Returns; - ClonedCodeInfo InlinedFunctionInfo; + ClonedCodeInfo OutlinedFunctionInfo; + + // If the start block contains PHI nodes, we need to map them. + BasicBlock::iterator II = StartBB->begin(); + while (auto *PN = dyn_cast<PHINode>(II)) { + bool Mapped = false; + // Look for PHI values that we have already mapped (such as the selector). + for (Value *Val : PN->incoming_values()) { + if (VMap.count(Val)) { + VMap[PN] = VMap[Val]; + Mapped = true; + } + } + // If we didn't find a match for this value, map it as an undef. + if (!Mapped) { + VMap[PN] = UndefValue::get(PN->getType()); + } + ++II; + } - BasicBlock::iterator II = LPad; + // Skip over PHIs and, if applicable, landingpad instructions. + II = StartBB->getFirstInsertionPt(); - CloneAndPruneIntoFromInst(CatchHandler, SrcFn, ++II, VMap, + CloneAndPruneIntoFromInst(Handler, SrcFn, II, VMap, /*ModuleLevelChanges=*/false, Returns, "", - &InlinedFunctionInfo, - SrcFn->getParent()->getDataLayout(), &Director); + &OutlinedFunctionInfo, Director.get()); // Move all the instructions in the first cloned block into our entry block. BasicBlock *FirstClonedBB = std::next(Function::iterator(Entry)); Entry->getInstList().splice(Entry->end(), FirstClonedBB->getInstList()); FirstClonedBB->eraseFromParent(); + if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) { + WinEHCatchDirector *CatchDirector = + reinterpret_cast<WinEHCatchDirector *>(Director.get()); + CatchAction->setExceptionVar(CatchDirector->getExceptionVar()); + CatchAction->setReturnTargets(CatchDirector->getReturnTargets()); + } + + Action->setHandlerBlockOrFunc(Handler); + return true; } -CloningDirector::CloningAction WinEHCatchDirector::handleInstruction( - ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) { - // Intercept instructions which extract values from the landing pad aggregate. - if (auto *Extract = dyn_cast<ExtractValueInst>(Inst)) { - if (Extract->getAggregateOperand() == LPI) { - assert(Extract->getNumIndices() == 1 && - "Unexpected operation: extracting both landing pad values"); - assert((*(Extract->idx_begin()) == 0 || *(Extract->idx_begin()) == 1) && - "Unexpected operation: extracting an unknown landing pad element"); - - if (*(Extract->idx_begin()) == 0) { - // Element 0 doesn't directly corresponds to anything in the WinEH - // scheme. - // It will be stored to a memory location, then later loaded and finally - // the loaded value will be used as the argument to an - // llvm.eh.begincatch - // call. We're tracking it here so that we can skip the store and load. - ExtractedEHPtr = Inst; - } else { - // Element 1 corresponds to the filter selector. We'll map it to 1 for - // matching purposes, but it will also probably be stored to memory and - // reloaded, so we need to track the instuction so that we can map the - // loaded value too. - VMap[Inst] = ConstantInt::get(SelectorIDType, 1); - ExtractedSelector = Inst; - } - - // Tell the caller not to clone this instruction. - return CloningDirector::SkipInstruction; - } - // Other extract value instructions just get cloned. - return CloningDirector::CloneInstruction; +/// This BB must end in a selector dispatch. All we need to do is pass the +/// handler block to llvm.eh.actions and list it as a possible indirectbr +/// target. +void WinEHPrepare::processSEHCatchHandler(CatchHandler *CatchAction, + BasicBlock *StartBB) { + BasicBlock *HandlerBB; + BasicBlock *NextBB; + Constant *Selector; + bool Res = isSelectorDispatch(StartBB, HandlerBB, Selector, NextBB); + if (Res) { + // If this was EH dispatch, this must be a conditional branch to the handler + // block. + // FIXME: Handle instructions in the dispatch block. Currently we drop them, + // leading to crashes if some optimization hoists stuff here. + assert(CatchAction->getSelector() && HandlerBB && + "expected catch EH dispatch"); + } else { + // This must be a catch-all. Split the block after the landingpad. + assert(CatchAction->getSelector()->isNullValue() && "expected catch-all"); + HandlerBB = + StartBB->splitBasicBlock(StartBB->getFirstInsertionPt(), "catch.all"); } + CatchAction->setHandlerBlockOrFunc(BlockAddress::get(HandlerBB)); + TinyPtrVector<BasicBlock *> Targets(HandlerBB); + CatchAction->setReturnTargets(Targets); +} - if (auto *Store = dyn_cast<StoreInst>(Inst)) { - // Look for and suppress stores of the extracted landingpad values. - const Value *StoredValue = Store->getValueOperand(); - if (StoredValue == ExtractedEHPtr) { - EHPtrStoreAddr = Store->getPointerOperand(); - return CloningDirector::SkipInstruction; +void LandingPadMap::mapLandingPad(const LandingPadInst *LPad) { + // Each instance of this class should only ever be used to map a single + // landing pad. + assert(OriginLPad == nullptr || OriginLPad == LPad); + + // If the landing pad has already been mapped, there's nothing more to do. + if (OriginLPad == LPad) + return; + + OriginLPad = LPad; + + // The landingpad instruction returns an aggregate value. Typically, its + // value will be passed to a pair of extract value instructions and the + // results of those extracts are often passed to store instructions. + // In unoptimized code the stored value will often be loaded and then stored + // again. + for (auto *U : LPad->users()) { + const ExtractValueInst *Extract = dyn_cast<ExtractValueInst>(U); + if (!Extract) + continue; + assert(Extract->getNumIndices() == 1 && + "Unexpected operation: extracting both landing pad values"); + unsigned int Idx = *(Extract->idx_begin()); + assert((Idx == 0 || Idx == 1) && + "Unexpected operation: extracting an unknown landing pad element"); + if (Idx == 0) { + // Element 0 doesn't directly corresponds to anything in the WinEH + // scheme. + // It will be stored to a memory location, then later loaded and finally + // the loaded value will be used as the argument to an + // llvm.eh.begincatch + // call. We're tracking it here so that we can skip the store and load. + ExtractedEHPtrs.push_back(Extract); + } else if (Idx == 1) { + // Element 1 corresponds to the filter selector. We'll map it to 1 for + // matching purposes, but it will also probably be stored to memory and + // reloaded, so we need to track the instuction so that we can map the + // loaded value too. + ExtractedSelectors.push_back(Extract); } - if (StoredValue == ExtractedSelector) { - SelectorStoreAddr = Store->getPointerOperand(); - return CloningDirector::SkipInstruction; + + // Look for stores of the extracted values. + for (auto *EU : Extract->users()) { + if (auto *Store = dyn_cast<StoreInst>(EU)) { + if (Idx == 1) { + SelectorStores.push_back(Store); + SelectorStoreAddrs.push_back(Store->getPointerOperand()); + } else { + EHPtrStores.push_back(Store); + EHPtrStoreAddrs.push_back(Store->getPointerOperand()); + } + } } + } +} - // Any other store just gets cloned. - return CloningDirector::CloneInstruction; +bool LandingPadMap::isLandingPadSpecificInst(const Instruction *Inst) const { + if (Inst == OriginLPad) + return true; + for (auto *Extract : ExtractedEHPtrs) { + if (Inst == Extract) + return true; + } + for (auto *Extract : ExtractedSelectors) { + if (Inst == Extract) + return true; + } + for (auto *Store : EHPtrStores) { + if (Inst == Store) + return true; + } + for (auto *Store : SelectorStores) { + if (Inst == Store) + return true; + } + + return false; +} + +void LandingPadMap::remapSelector(ValueToValueMapTy &VMap, + Value *MappedValue) const { + // Remap all selector extract instructions to the specified value. + for (auto *Extract : ExtractedSelectors) + VMap[Extract] = MappedValue; +} + +bool LandingPadMap::mapIfEHLoad(const LoadInst *Load, + SmallVectorImpl<const StoreInst *> &Stores, + SmallVectorImpl<const Value *> &StoreAddrs) { + // This makes the assumption that a store we've previously seen dominates + // this load instruction. That might seem like a rather huge assumption, + // but given the way that landingpads are constructed its fairly safe. + // FIXME: Add debug/assert code that verifies this. + const Value *LoadAddr = Load->getPointerOperand(); + for (auto *StoreAddr : StoreAddrs) { + if (LoadAddr == StoreAddr) { + // Handle the common debug scenario where this loaded value is stored + // to a different location. + for (auto *U : Load->users()) { + if (auto *Store = dyn_cast<StoreInst>(U)) { + Stores.push_back(Store); + StoreAddrs.push_back(Store->getPointerOperand()); + } + } + return true; + } } + return false; +} + +CloningDirector::CloningAction WinEHCloningDirectorBase::handleInstruction( + ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) { + // If this is one of the boilerplate landing pad instructions, skip it. + // The instruction will have already been remapped in VMap. + if (LPadMap.isLandingPadSpecificInst(Inst)) + return CloningDirector::SkipInstruction; if (auto *Load = dyn_cast<LoadInst>(Inst)) { // Look for loads of (previously suppressed) landingpad values. - // The EHPtr load can be ignored (it should only be used as - // an argument to llvm.eh.begincatch), but the selector value - // needs to be mapped to a constant value of 1 to be used to - // simplify the branching to always flow to the current handler. - const Value *LoadAddr = Load->getPointerOperand(); - if (LoadAddr == EHPtrStoreAddr) { - VMap[Inst] = UndefValue::get(Int8PtrType); + // The EHPtr load can be mapped to an undef value as it should only be used + // as an argument to llvm.eh.begincatch, but the selector value needs to be + // mapped to a constant value of 1. This value will be used to simplify the + // branching to always flow to the current handler. + if (LPadMap.mapIfSelectorLoad(Load)) { + VMap[Inst] = ConstantInt::get(SelectorIDType, 1); return CloningDirector::SkipInstruction; } - if (LoadAddr == SelectorStoreAddr) { - VMap[Inst] = ConstantInt::get(SelectorIDType, 1); + if (LPadMap.mapIfEHPtrLoad(Load)) { + VMap[Inst] = UndefValue::get(Int8PtrType); return CloningDirector::SkipInstruction; } @@ -519,108 +910,576 @@ CloningDirector::CloningAction WinEHCatchDirector::handleInstruction( return CloningDirector::CloneInstruction; } - if (match(Inst, m_Intrinsic<Intrinsic::eh_begincatch>())) { - // The argument to the call is some form of the first element of the - // landingpad aggregate value, but that doesn't matter. It isn't used - // here. - // The return value of this instruction, however, is used to access the - // EH object pointer. We have generated an instruction to get that value - // from the EH alloc block, so we can just map to that here. - VMap[Inst] = EHObj; - return CloningDirector::SkipInstruction; - } - if (match(Inst, m_Intrinsic<Intrinsic::eh_endcatch>())) { - auto *IntrinCall = dyn_cast<IntrinsicInst>(Inst); - // It might be interesting to track whether or not we are inside a catch - // function, but that might make the algorithm more brittle than it needs - // to be. - - // The end catch call can occur in one of two places: either in a - // landingpad - // block that is part of the catch handlers exception mechanism, or at the - // end of the catch block. If it occurs in a landing pad, we must skip it - // and continue so that the landing pad gets cloned. - // FIXME: This case isn't fully supported yet and shouldn't turn up in any - // of the test cases until it is. - if (IntrinCall->getParent()->isLandingPad()) - return CloningDirector::SkipInstruction; - - // If an end catch occurs anywhere else the next instruction should be an - // unconditional branch instruction that we want to replace with a return - // to the the address of the branch target. - const BasicBlock *EndCatchBB = IntrinCall->getParent(); - const TerminatorInst *Terminator = EndCatchBB->getTerminator(); - const BranchInst *Branch = dyn_cast<BranchInst>(Terminator); - assert(Branch && Branch->isUnconditional()); - assert(std::next(BasicBlock::const_iterator(IntrinCall)) == - BasicBlock::const_iterator(Branch)); - - ReturnInst::Create(NewBB->getContext(), - BlockAddress::get(Branch->getSuccessor(0)), NewBB); - - // We just added a terminator to the cloned block. - // Tell the caller to stop processing the current basic block so that - // the branch instruction will be skipped. + // Nested landing pads will be cloned as stubs, with just the + // landingpad instruction and an unreachable instruction. When + // all landingpads have been outlined, we'll replace this with the + // llvm.eh.actions call and indirect branch created when the + // landing pad was outlined. + if (auto *NestedLPad = dyn_cast<LandingPadInst>(Inst)) { + Instruction *NewInst = NestedLPad->clone(); + if (NestedLPad->hasName()) + NewInst->setName(NestedLPad->getName()); + // FIXME: Store this mapping somewhere else also. + VMap[NestedLPad] = NewInst; + BasicBlock::InstListType &InstList = NewBB->getInstList(); + InstList.push_back(NewInst); + InstList.push_back(new UnreachableInst(NewBB->getContext())); return CloningDirector::StopCloningBB; } - if (match(Inst, m_Intrinsic<Intrinsic::eh_typeid_for>())) { - auto *IntrinCall = dyn_cast<IntrinsicInst>(Inst); - Value *Selector = IntrinCall->getArgOperand(0)->stripPointerCasts(); - // This causes a replacement that will collapse the landing pad CFG based - // on the filter function we intend to match. - if (Selector == CurrentSelector) - VMap[Inst] = ConstantInt::get(SelectorIDType, 1); - else - VMap[Inst] = ConstantInt::get(SelectorIDType, 0); - // Tell the caller not to clone this instruction. - return CloningDirector::SkipInstruction; - } + + if (auto *Invoke = dyn_cast<InvokeInst>(Inst)) + return handleInvoke(VMap, Invoke, NewBB); + + if (auto *Resume = dyn_cast<ResumeInst>(Inst)) + return handleResume(VMap, Resume, NewBB); + + if (match(Inst, m_Intrinsic<Intrinsic::eh_begincatch>())) + return handleBeginCatch(VMap, Inst, NewBB); + if (match(Inst, m_Intrinsic<Intrinsic::eh_endcatch>())) + return handleEndCatch(VMap, Inst, NewBB); + if (match(Inst, m_Intrinsic<Intrinsic::eh_typeid_for>())) + return handleTypeIdFor(VMap, Inst, NewBB); // Continue with the default cloning behavior. return CloningDirector::CloneInstruction; } +CloningDirector::CloningAction WinEHCatchDirector::handleBeginCatch( + ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) { + // The argument to the call is some form of the first element of the + // landingpad aggregate value, but that doesn't matter. It isn't used + // here. + // The second argument is an outparameter where the exception object will be + // stored. Typically the exception object is a scalar, but it can be an + // aggregate when catching by value. + // FIXME: Leave something behind to indicate where the exception object lives + // for this handler. Should it be part of llvm.eh.actions? + assert(ExceptionObjectVar == nullptr && "Multiple calls to " + "llvm.eh.begincatch found while " + "outlining catch handler."); + ExceptionObjectVar = Inst->getOperand(1)->stripPointerCasts(); + return CloningDirector::SkipInstruction; +} + +CloningDirector::CloningAction +WinEHCatchDirector::handleEndCatch(ValueToValueMapTy &VMap, + const Instruction *Inst, BasicBlock *NewBB) { + auto *IntrinCall = dyn_cast<IntrinsicInst>(Inst); + // It might be interesting to track whether or not we are inside a catch + // function, but that might make the algorithm more brittle than it needs + // to be. + + // The end catch call can occur in one of two places: either in a + // landingpad block that is part of the catch handlers exception mechanism, + // or at the end of the catch block. If it occurs in a landing pad, we must + // skip it and continue so that the landing pad gets cloned. + // FIXME: This case isn't fully supported yet and shouldn't turn up in any + // of the test cases until it is. + if (IntrinCall->getParent()->isLandingPad()) + return CloningDirector::SkipInstruction; + + // If an end catch occurs anywhere else the next instruction should be an + // unconditional branch instruction that we want to replace with a return + // to the the address of the branch target. + const BasicBlock *EndCatchBB = IntrinCall->getParent(); + const TerminatorInst *Terminator = EndCatchBB->getTerminator(); + const BranchInst *Branch = dyn_cast<BranchInst>(Terminator); + assert(Branch && Branch->isUnconditional()); + assert(std::next(BasicBlock::const_iterator(IntrinCall)) == + BasicBlock::const_iterator(Branch)); + + BasicBlock *ContinueLabel = Branch->getSuccessor(0); + ReturnInst::Create(NewBB->getContext(), BlockAddress::get(ContinueLabel), + NewBB); + ReturnTargets.push_back(ContinueLabel); + + // We just added a terminator to the cloned block. + // Tell the caller to stop processing the current basic block so that + // the branch instruction will be skipped. + return CloningDirector::StopCloningBB; +} + +CloningDirector::CloningAction WinEHCatchDirector::handleTypeIdFor( + ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) { + auto *IntrinCall = dyn_cast<IntrinsicInst>(Inst); + Value *Selector = IntrinCall->getArgOperand(0)->stripPointerCasts(); + // This causes a replacement that will collapse the landing pad CFG based + // on the filter function we intend to match. + if (Selector == CurrentSelector) + VMap[Inst] = ConstantInt::get(SelectorIDType, 1); + else + VMap[Inst] = ConstantInt::get(SelectorIDType, 0); + // Tell the caller not to clone this instruction. + return CloningDirector::SkipInstruction; +} + +CloningDirector::CloningAction +WinEHCatchDirector::handleInvoke(ValueToValueMapTy &VMap, + const InvokeInst *Invoke, BasicBlock *NewBB) { + return CloningDirector::CloneInstruction; +} + +CloningDirector::CloningAction +WinEHCatchDirector::handleResume(ValueToValueMapTy &VMap, + const ResumeInst *Resume, BasicBlock *NewBB) { + // Resume instructions shouldn't be reachable from catch handlers. + // We still need to handle it, but it will be pruned. + BasicBlock::InstListType &InstList = NewBB->getInstList(); + InstList.push_back(new UnreachableInst(NewBB->getContext())); + return CloningDirector::StopCloningBB; +} + +CloningDirector::CloningAction WinEHCleanupDirector::handleBeginCatch( + ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) { + // Catch blocks within cleanup handlers will always be unreachable. + // We'll insert an unreachable instruction now, but it will be pruned + // before the cloning process is complete. + BasicBlock::InstListType &InstList = NewBB->getInstList(); + InstList.push_back(new UnreachableInst(NewBB->getContext())); + return CloningDirector::StopCloningBB; +} + +CloningDirector::CloningAction WinEHCleanupDirector::handleEndCatch( + ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) { + // Catch blocks within cleanup handlers will always be unreachable. + // We'll insert an unreachable instruction now, but it will be pruned + // before the cloning process is complete. + BasicBlock::InstListType &InstList = NewBB->getInstList(); + InstList.push_back(new UnreachableInst(NewBB->getContext())); + return CloningDirector::StopCloningBB; +} + +CloningDirector::CloningAction WinEHCleanupDirector::handleTypeIdFor( + ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) { + // If we encounter a selector comparison while cloning a cleanup handler, + // we want to stop cloning immediately. Anything after the dispatch + // will be outlined into a different handler. + BasicBlock *CatchHandler; + Constant *Selector; + BasicBlock *NextBB; + if (isSelectorDispatch(const_cast<BasicBlock *>(Inst->getParent()), + CatchHandler, Selector, NextBB)) { + ReturnInst::Create(NewBB->getContext(), nullptr, NewBB); + return CloningDirector::StopCloningBB; + } + // If eg.typeid.for is called for any other reason, it can be ignored. + VMap[Inst] = ConstantInt::get(SelectorIDType, 0); + return CloningDirector::SkipInstruction; +} + +CloningDirector::CloningAction WinEHCleanupDirector::handleInvoke( + ValueToValueMapTy &VMap, const InvokeInst *Invoke, BasicBlock *NewBB) { + // All invokes in cleanup handlers can be replaced with calls. + SmallVector<Value *, 16> CallArgs(Invoke->op_begin(), Invoke->op_end() - 3); + // Insert a normal call instruction... + CallInst *NewCall = + CallInst::Create(const_cast<Value *>(Invoke->getCalledValue()), CallArgs, + Invoke->getName(), NewBB); + NewCall->setCallingConv(Invoke->getCallingConv()); + NewCall->setAttributes(Invoke->getAttributes()); + NewCall->setDebugLoc(Invoke->getDebugLoc()); + VMap[Invoke] = NewCall; + + // Insert an unconditional branch to the normal destination. + BranchInst::Create(Invoke->getNormalDest(), NewBB); + + // The unwind destination won't be cloned into the new function, so + // we don't need to clean up its phi nodes. + + // We just added a terminator to the cloned block. + // Tell the caller to stop processing the current basic block. + return CloningDirector::StopCloningBB; +} + +CloningDirector::CloningAction WinEHCleanupDirector::handleResume( + ValueToValueMapTy &VMap, const ResumeInst *Resume, BasicBlock *NewBB) { + ReturnInst::Create(NewBB->getContext(), nullptr, NewBB); + + // We just added a terminator to the cloned block. + // Tell the caller to stop processing the current basic block so that + // the branch instruction will be skipped. + return CloningDirector::StopCloningBB; +} + WinEHFrameVariableMaterializer::WinEHFrameVariableMaterializer( Function *OutlinedFn, FrameVarInfoMap &FrameVarInfo) : FrameVarInfo(FrameVarInfo), Builder(OutlinedFn->getContext()) { Builder.SetInsertPoint(&OutlinedFn->getEntryBlock()); - // FIXME: Do something with the FrameVarMapped so that it is shared across the - // function. } Value *WinEHFrameVariableMaterializer::materializeValueFor(Value *V) { - // If we're asked to materialize an alloca variable, we temporarily - // create a matching alloca in the outlined function. When all the - // outlining is complete, we'll collect these into a structure and - // replace these temporary allocas with GEPs referencing the frame - // allocation block. + // If we're asked to materialize a value that is an instruction, we + // temporarily create an alloca in the outlined function and add this + // to the FrameVarInfo map. When all the outlining is complete, we'll + // collect these into a structure, spilling non-alloca values in the + // parent frame as necessary, and replace these temporary allocas with + // GEPs referencing the frame allocation block. + + // If the value is an alloca, the mapping is direct. if (auto *AV = dyn_cast<AllocaInst>(V)) { - AllocaInst *NewAlloca = Builder.CreateAlloca( - AV->getAllocatedType(), AV->getArraySize(), AV->getName()); - FrameVarInfo[AV].Allocas.push_back(NewAlloca); + AllocaInst *NewAlloca = dyn_cast<AllocaInst>(AV->clone()); + Builder.Insert(NewAlloca, AV->getName()); + FrameVarInfo[AV].push_back(NewAlloca); return NewAlloca; } -// FIXME: Do PHI nodes need special handling? + // For other types of instructions or arguments, we need an alloca based on + // the value's type and a load of the alloca. The alloca will be replaced + // by a GEP, but the load will stay. In the parent function, the value will + // be spilled to a location in the frame allocation block. + if (isa<Instruction>(V) || isa<Argument>(V)) { + AllocaInst *NewAlloca = + Builder.CreateAlloca(V->getType(), nullptr, "eh.temp.alloca"); + FrameVarInfo[V].push_back(NewAlloca); + LoadInst *NewLoad = Builder.CreateLoad(NewAlloca, V->getName() + ".reload"); + return NewLoad; + } -// FIXME: Are there other cases we can handle better? GEP, ExtractValue, etc. + // Don't materialize other values. + return nullptr; +} -// FIXME: This doesn't work during cloning because it finds an instruction -// in the use list that isn't yet part of a basic block. -#if 0 - // If we're asked to remap some other instruction, we'll need to - // spill it to an alloca variable in the parent function and add a - // temporary alloca in the outlined function to be processed as - // described above. - Instruction *Inst = dyn_cast<Instruction>(V); - if (Inst) { - AllocaInst *Spill = DemoteRegToStack(*Inst, true); - AllocaInst *NewAlloca = Builder.CreateAlloca(Spill->getAllocatedType(), - Spill->getArraySize()); - FrameVarMap[AV] = NewAlloca; - return NewAlloca; +// This function maps the catch and cleanup handlers that are reachable from the +// specified landing pad. The landing pad sequence will have this basic shape: +// +// <cleanup handler> +// <selector comparison> +// <catch handler> +// <cleanup handler> +// <selector comparison> +// <catch handler> +// <cleanup handler> +// ... +// +// Any of the cleanup slots may be absent. The cleanup slots may be occupied by +// any arbitrary control flow, but all paths through the cleanup code must +// eventually reach the next selector comparison and no path can skip to a +// different selector comparisons, though some paths may terminate abnormally. +// Therefore, we will use a depth first search from the start of any given +// cleanup block and stop searching when we find the next selector comparison. +// +// If the landingpad instruction does not have a catch clause, we will assume +// that any instructions other than selector comparisons and catch handlers can +// be ignored. In practice, these will only be the boilerplate instructions. +// +// The catch handlers may also have any control structure, but we are only +// interested in the start of the catch handlers, so we don't need to actually +// follow the flow of the catch handlers. The start of the catch handlers can +// be located from the compare instructions, but they can be skipped in the +// flow by following the contrary branch. +void WinEHPrepare::mapLandingPadBlocks(LandingPadInst *LPad, + LandingPadActions &Actions) { + unsigned int NumClauses = LPad->getNumClauses(); + unsigned int HandlersFound = 0; + BasicBlock *BB = LPad->getParent(); + + DEBUG(dbgs() << "Mapping landing pad: " << BB->getName() << "\n"); + + if (NumClauses == 0) { + // This landing pad contains only cleanup code. + CleanupHandler *Action = new CleanupHandler(BB); + CleanupHandlerMap[BB] = Action; + Actions.insertCleanupHandler(Action); + DEBUG(dbgs() << " Assuming cleanup code in block " << BB->getName() + << "\n"); + assert(LPad->isCleanup()); + return; + } + + VisitedBlockSet VisitedBlocks; + + while (HandlersFound != NumClauses) { + BasicBlock *NextBB = nullptr; + + // See if the clause we're looking for is a catch-all. + // If so, the catch begins immediately. + if (isa<ConstantPointerNull>(LPad->getClause(HandlersFound))) { + // The catch all must occur last. + assert(HandlersFound == NumClauses - 1); + + // For C++ EH, check if there is any interesting cleanup code before we + // begin the catch. This is important because cleanups cannot rethrow + // exceptions but code called from catches can. For SEH, it isn't + // important if some finally code before a catch-all is executed out of + // line or after recovering from the exception. + if (Personality == EHPersonality::MSVC_CXX) { + if (auto *CleanupAction = findCleanupHandler(BB, BB)) { + // Add a cleanup entry to the list + Actions.insertCleanupHandler(CleanupAction); + DEBUG(dbgs() << " Found cleanup code in block " + << CleanupAction->getStartBlock()->getName() << "\n"); + } + } + + // Add the catch handler to the action list. + CatchHandler *Action = + new CatchHandler(BB, LPad->getClause(HandlersFound), nullptr); + CatchHandlerMap[BB] = Action; + Actions.insertCatchHandler(Action); + DEBUG(dbgs() << " Catch all handler at block " << BB->getName() << "\n"); + ++HandlersFound; + + // Once we reach a catch-all, don't expect to hit a resume instruction. + BB = nullptr; + break; + } + + CatchHandler *CatchAction = findCatchHandler(BB, NextBB, VisitedBlocks); + // See if there is any interesting code executed before the dispatch. + if (auto *CleanupAction = + findCleanupHandler(BB, CatchAction->getStartBlock())) { + // Add a cleanup entry to the list + Actions.insertCleanupHandler(CleanupAction); + DEBUG(dbgs() << " Found cleanup code in block " + << CleanupAction->getStartBlock()->getName() << "\n"); + } + + assert(CatchAction); + ++HandlersFound; + + // Add the catch handler to the action list. + Actions.insertCatchHandler(CatchAction); + DEBUG(dbgs() << " Found catch dispatch in block " + << CatchAction->getStartBlock()->getName() << "\n"); + + // Move on to the block after the catch handler. + BB = NextBB; + } + + // If we didn't wind up in a catch-all, see if there is any interesting code + // executed before the resume. + if (auto *CleanupAction = findCleanupHandler(BB, BB)) { + // Add a cleanup entry to the list + Actions.insertCleanupHandler(CleanupAction); + DEBUG(dbgs() << " Found cleanup code in block " + << CleanupAction->getStartBlock()->getName() << "\n"); + } + + // It's possible that some optimization moved code into a landingpad that + // wasn't + // previously being used for cleanup. If that happens, we need to execute + // that + // extra code from a cleanup handler. + if (Actions.includesCleanup() && !LPad->isCleanup()) + LPad->setCleanup(true); +} + +// This function searches starting with the input block for the next +// block that terminates with a branch whose condition is based on a selector +// comparison. This may be the input block. See the mapLandingPadBlocks +// comments for a discussion of control flow assumptions. +// +CatchHandler *WinEHPrepare::findCatchHandler(BasicBlock *BB, + BasicBlock *&NextBB, + VisitedBlockSet &VisitedBlocks) { + // See if we've already found a catch handler use it. + // Call count() first to avoid creating a null entry for blocks + // we haven't seen before. + if (CatchHandlerMap.count(BB) && CatchHandlerMap[BB] != nullptr) { + CatchHandler *Action = cast<CatchHandler>(CatchHandlerMap[BB]); + NextBB = Action->getNextBB(); + return Action; } -#endif + // VisitedBlocks applies only to the current search. We still + // need to consider blocks that we've visited while mapping other + // landing pads. + VisitedBlocks.insert(BB); + + BasicBlock *CatchBlock = nullptr; + Constant *Selector = nullptr; + + // If this is the first time we've visited this block from any landing pad + // look to see if it is a selector dispatch block. + if (!CatchHandlerMap.count(BB)) { + if (isSelectorDispatch(BB, CatchBlock, Selector, NextBB)) { + CatchHandler *Action = new CatchHandler(BB, Selector, NextBB); + CatchHandlerMap[BB] = Action; + return Action; + } + } + + // Visit each successor, looking for the dispatch. + // FIXME: We expect to find the dispatch quickly, so this will probably + // work better as a breadth first search. + for (BasicBlock *Succ : successors(BB)) { + if (VisitedBlocks.count(Succ)) + continue; + + CatchHandler *Action = findCatchHandler(Succ, NextBB, VisitedBlocks); + if (Action) + return Action; + } + return nullptr; +} + +// These are helper functions to combine repeated code from findCleanupHandler. +static CleanupHandler *createCleanupHandler(CleanupHandlerMapTy &CleanupHandlerMap, + BasicBlock *BB) { + CleanupHandler *Action = new CleanupHandler(BB); + CleanupHandlerMap[BB] = Action; + return Action; +} + +// This function searches starting with the input block for the next block that +// contains code that is not part of a catch handler and would not be eliminated +// during handler outlining. +// +CleanupHandler *WinEHPrepare::findCleanupHandler(BasicBlock *StartBB, + BasicBlock *EndBB) { + // Here we will skip over the following: + // + // landing pad prolog: + // + // Unconditional branches + // + // Selector dispatch + // + // Resume pattern + // + // Anything else marks the start of an interesting block + + BasicBlock *BB = StartBB; + // Anything other than an unconditional branch will kick us out of this loop + // one way or another. + while (BB) { + // If we've already scanned this block, don't scan it again. If it is + // a cleanup block, there will be an action in the CleanupHandlerMap. + // If we've scanned it and it is not a cleanup block, there will be a + // nullptr in the CleanupHandlerMap. If we have not scanned it, there will + // be no entry in the CleanupHandlerMap. We must call count() first to + // avoid creating a null entry for blocks we haven't scanned. + if (CleanupHandlerMap.count(BB)) { + if (auto *Action = CleanupHandlerMap[BB]) { + return cast<CleanupHandler>(Action); + } else { + // Here we handle the case where the cleanup handler map contains a + // value for this block but the value is a nullptr. This means that + // we have previously analyzed the block and determined that it did + // not contain any cleanup code. Based on the earlier analysis, we + // know the the block must end in either an unconditional branch, a + // resume or a conditional branch that is predicated on a comparison + // with a selector. Either the resume or the selector dispatch + // would terminate the search for cleanup code, so the unconditional + // branch is the only case for which we might need to continue + // searching. + if (BB == EndBB) + return nullptr; + BasicBlock *SuccBB; + if (!match(BB->getTerminator(), m_UnconditionalBr(SuccBB))) + return nullptr; + BB = SuccBB; + continue; + } + } + + // Create an entry in the cleanup handler map for this block. Initially + // we create an entry that says this isn't a cleanup block. If we find + // cleanup code, the caller will replace this entry. + CleanupHandlerMap[BB] = nullptr; + + TerminatorInst *Terminator = BB->getTerminator(); + + // Landing pad blocks have extra instructions we need to accept. + LandingPadMap *LPadMap = nullptr; + if (BB->isLandingPad()) { + LandingPadInst *LPad = BB->getLandingPadInst(); + LPadMap = &LPadMaps[LPad]; + if (!LPadMap->isInitialized()) + LPadMap->mapLandingPad(LPad); + } + + // Look for the bare resume pattern: + // %exn2 = load i8** %exn.slot + // %sel2 = load i32* %ehselector.slot + // %lpad.val1 = insertvalue { i8*, i32 } undef, i8* %exn2, 0 + // %lpad.val2 = insertvalue { i8*, i32 } %lpad.val1, i32 %sel2, 1 + // resume { i8*, i32 } %lpad.val2 + if (auto *Resume = dyn_cast<ResumeInst>(Terminator)) { + InsertValueInst *Insert1 = nullptr; + InsertValueInst *Insert2 = nullptr; + Value *ResumeVal = Resume->getOperand(0); + // If there is only one landingpad, we may use the lpad directly with no + // insertions. + if (isa<LandingPadInst>(ResumeVal)) + return nullptr; + if (!isa<PHINode>(ResumeVal)) { + Insert2 = dyn_cast<InsertValueInst>(ResumeVal); + if (!Insert2) + return createCleanupHandler(CleanupHandlerMap, BB); + Insert1 = dyn_cast<InsertValueInst>(Insert2->getAggregateOperand()); + if (!Insert1) + return createCleanupHandler(CleanupHandlerMap, BB); + } + for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end(); + II != IE; ++II) { + Instruction *Inst = II; + if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst)) + continue; + if (Inst == Insert1 || Inst == Insert2 || Inst == Resume) + continue; + if (!Inst->hasOneUse() || + (Inst->user_back() != Insert1 && Inst->user_back() != Insert2)) { + return createCleanupHandler(CleanupHandlerMap, BB); + } + } + return nullptr; + } + + BranchInst *Branch = dyn_cast<BranchInst>(Terminator); + if (Branch) { + if (Branch->isConditional()) { + // Look for the selector dispatch. + // %sel = load i32* %ehselector.slot + // %2 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIf to i8*)) + // %matches = icmp eq i32 %sel12, %2 + // br i1 %matches, label %catch14, label %eh.resume + CmpInst *Compare = dyn_cast<CmpInst>(Branch->getCondition()); + if (!Compare || !Compare->isEquality()) + return createCleanupHandler(CleanupHandlerMap, BB); + for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), + IE = BB->end(); + II != IE; ++II) { + Instruction *Inst = II; + if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst)) + continue; + if (Inst == Compare || Inst == Branch) + continue; + if (!Inst->hasOneUse() || (Inst->user_back() != Compare)) + return createCleanupHandler(CleanupHandlerMap, BB); + if (match(Inst, m_Intrinsic<Intrinsic::eh_typeid_for>())) + continue; + if (!isa<LoadInst>(Inst)) + return createCleanupHandler(CleanupHandlerMap, BB); + } + // The selector dispatch block should always terminate our search. + assert(BB == EndBB); + return nullptr; + } else { + // Look for empty blocks with unconditional branches. + for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), + IE = BB->end(); + II != IE; ++II) { + Instruction *Inst = II; + if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst)) + continue; + if (Inst == Branch) + continue; + if (match(Inst, m_Intrinsic<Intrinsic::eh_endcatch>())) + continue; + // Anything else makes this interesting cleanup code. + return createCleanupHandler(CleanupHandlerMap, BB); + } + if (BB == EndBB) + return nullptr; + // The branch was unconditional. + BB = Branch->getSuccessor(0); + continue; + } // End else of if branch was conditional + } // End if Branch + + // Anything else makes this interesting cleanup code. + return createCleanupHandler(CleanupHandlerMap, BB); + } return nullptr; } |