diff options
author | Stephen Hines <srhines@google.com> | 2014-07-21 00:45:20 -0700 |
---|---|---|
committer | Stephen Hines <srhines@google.com> | 2014-07-21 00:45:20 -0700 |
commit | c6a4f5e819217e1e12c458aed8e7b122e23a3a58 (patch) | |
tree | 81b7dd2bb4370a392f31d332a566c903b5744764 /lib/CodeGen | |
parent | 19c6fbb3e8aaf74093afa08013134b61fa08f245 (diff) | |
download | external_llvm-c6a4f5e819217e1e12c458aed8e7b122e23a3a58.zip external_llvm-c6a4f5e819217e1e12c458aed8e7b122e23a3a58.tar.gz external_llvm-c6a4f5e819217e1e12c458aed8e7b122e23a3a58.tar.bz2 |
Update LLVM for rebase to r212749.
Includes a cherry-pick of:
r212948 - fixes a small issue with atomic calls
Change-Id: Ib97bd980b59f18142a69506400911a6009d9df18
Diffstat (limited to 'lib/CodeGen')
62 files changed, 3790 insertions, 1544 deletions
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index 6fc83a2..1bdf312 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -7,13 +7,14 @@ // //===----------------------------------------------------------------------===// // -// This file defines several CodeGen-specific LLVM IR analysis utilties. +// This file defines several CodeGen-specific LLVM IR analysis utilities. // //===----------------------------------------------------------------------===// #include "llvm/CodeGen/Analysis.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" @@ -474,8 +475,7 @@ static bool nextRealType(SmallVectorImpl<CompositeType *> &SubTypes, /// between it and the return. /// /// This function only tests target-independent requirements. -bool llvm::isInTailCallPosition(ImmutableCallSite CS, - const TargetLowering &TLI) { +bool llvm::isInTailCallPosition(ImmutableCallSite CS, const SelectionDAG &DAG) { const Instruction *I = CS.getInstruction(); const BasicBlock *ExitBB = I->getParent(); const TerminatorInst *Term = ExitBB->getTerminator(); @@ -490,7 +490,7 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, // longjmp on x86), it can end up causing miscompilation that has not // been fully understood. if (!Ret && - (!TLI.getTargetMachine().Options.GuaranteedTailCallOpt || + (!DAG.getTarget().Options.GuaranteedTailCallOpt || !isa<UnreachableInst>(Term))) return false; @@ -509,7 +509,8 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, return false; } - return returnTypeIsEligibleForTailCall(ExitBB->getParent(), I, Ret, TLI); + return returnTypeIsEligibleForTailCall(ExitBB->getParent(), I, Ret, + *DAG.getTarget().getTargetLowering()); } bool llvm::returnTypeIsEligibleForTailCall(const Function *F, diff --git a/lib/CodeGen/Android.mk b/lib/CodeGen/Android.mk index 7feb42c..05e5c45 100644 --- a/lib/CodeGen/Android.mk +++ b/lib/CodeGen/Android.mk @@ -24,11 +24,13 @@ codegen_SRC_FILES := \ GCMetadata.cpp \ GCMetadataPrinter.cpp \ GCStrategy.cpp \ + GlobalMerge.cpp \ IfConversion.cpp \ InlineSpiller.cpp \ InterferenceCache.cpp \ IntrinsicLowering.cpp \ JITCodeEmitter.cpp \ + JumpInstrTables.cpp \ LatencyPriorityQueue.cpp \ LexicalScopes.cpp \ LiveDebugVariables.cpp \ diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp index 1cb0159..251f5ef 100644 --- a/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -37,8 +37,7 @@ using namespace llvm; ARMException::ARMException(AsmPrinter *A) - : DwarfException(A), - shouldEmitCFI(false) {} + : EHStreamer(A), shouldEmitCFI(false) {} ARMException::~ARMException() {} @@ -100,7 +99,7 @@ void ARMException::endFunction(const MachineFunction *) { ATS.emitHandlerData(); // Emit actual exception table - EmitExceptionTable(); + emitExceptionTable(); } } @@ -108,7 +107,7 @@ void ARMException::endFunction(const MachineFunction *) { ATS.emitFnEnd(); } -void ARMException::EmitTypeInfos(unsigned TTypeEncoding) { +void ARMException::emitTypeInfos(unsigned TTypeEncoding) { const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos(); const std::vector<unsigned> &FilterIds = MMI->getFilterIds(); diff --git a/lib/CodeGen/AsmPrinter/Android.mk b/lib/CodeGen/AsmPrinter/Android.mk index f56eb6e..083cc0d 100644 --- a/lib/CodeGen/AsmPrinter/Android.mk +++ b/lib/CodeGen/AsmPrinter/Android.mk @@ -1,33 +1,33 @@ LOCAL_PATH := $(call my-dir) codegen_asmprinter_SRC_FILES := \ - AsmPrinter.cpp + AddressPool.cpp \ + ARMException.cpp \ + AsmPrinter.cpp \ + AsmPrinterDwarf.cpp \ + AsmPrinterInlineAsm.cpp \ + DbgValueHistoryCalculator.cpp \ + DIE.cpp \ + DIEHash.cpp \ + DwarfAccelTable.cpp \ + DwarfCFIException.cpp \ + DwarfDebug.cpp \ + DwarfFile.cpp \ + DwarfStringPool.cpp \ + DwarfUnit.cpp \ + EHStreamer.cpp \ + ErlangGCPrinter.cpp \ + OcamlGCPrinter.cpp \ + Win64Exception.cpp \ + WinCodeViewLineTables.cpp + + # For the host # ===================================================== include $(CLEAR_VARS) -LOCAL_SRC_FILES := \ - AddressPool.cpp \ - AsmPrinter.cpp \ - AsmPrinterDwarf.cpp \ - AsmPrinterInlineAsm.cpp \ - ARMException.cpp \ - DbgValueHistoryCalculator.cpp \ - DIE.cpp \ - DIEHash.cpp \ - DwarfAccelTable.cpp \ - DwarfCFIException.cpp \ - DwarfDebug.cpp \ - DwarfException.cpp \ - DwarfFile.cpp \ - DwarfStringPool.cpp \ - DwarfUnit.cpp \ - ErlangGCPrinter.cpp \ - OcamlGCPrinter.cpp \ - Win64Exception.cpp \ - WinCodeViewLineTables.cpp - +LOCAL_SRC_FILES := $(codegen_asmprinter_SRC_FILES) LOCAL_MODULE:= libLLVMAsmPrinter LOCAL_MODULE_TAGS := optional @@ -41,27 +41,7 @@ include $(BUILD_HOST_STATIC_LIBRARY) ifneq (true,$(DISABLE_LLVM_DEVICE_BUILDS)) include $(CLEAR_VARS) -LOCAL_SRC_FILES := \ - AddressPool.cpp \ - AsmPrinter.cpp \ - AsmPrinterDwarf.cpp \ - AsmPrinterInlineAsm.cpp \ - ARMException.cpp \ - DbgValueHistoryCalculator.cpp \ - DIE.cpp \ - DIEHash.cpp \ - DwarfAccelTable.cpp \ - DwarfCFIException.cpp \ - DwarfDebug.cpp \ - DwarfException.cpp \ - DwarfFile.cpp \ - DwarfStringPool.cpp \ - DwarfUnit.cpp \ - ErlangGCPrinter.cpp \ - OcamlGCPrinter.cpp \ - Win64Exception.cpp \ - WinCodeViewLineTables.cpp - +LOCAL_SRC_FILES := $(codegen_asmprinter_SRC_FILES) LOCAL_MODULE:= libLLVMAsmPrinter LOCAL_MODULE_TAGS := optional diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 7de9c6d..f80fdea 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/JumpInstrTableInfo.h" #include "llvm/CodeGen/GCMetadataPrinter.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -46,7 +47,6 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Utils/GlobalStatus.h" @@ -232,23 +232,23 @@ bool AsmPrinter::doInitialization(Module &M) { } } - DwarfException *DE = nullptr; + EHStreamer *ES = nullptr; switch (MAI->getExceptionHandlingType()) { case ExceptionHandling::None: break; case ExceptionHandling::SjLj: case ExceptionHandling::DwarfCFI: - DE = new DwarfCFIException(this); + ES = new DwarfCFIException(this); break; case ExceptionHandling::ARM: - DE = new ARMException(this); + ES = new ARMException(this); break; - case ExceptionHandling::Win64: - DE = new Win64Exception(this); + case ExceptionHandling::WinEH: + ES = new Win64Exception(this); break; } - if (DE) - Handlers.push_back(HandlerInfo(DE, EHTimerName, DWARFGroupName)); + if (ES) + Handlers.push_back(HandlerInfo(ES, EHTimerName, DWARFGroupName)); return false; } @@ -709,13 +709,12 @@ AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() { } bool AsmPrinter::needsSEHMoves() { - return MAI->getExceptionHandlingType() == ExceptionHandling::Win64 && + return MAI->getExceptionHandlingType() == ExceptionHandling::WinEH && MF->getFunction()->needsUnwindTableEntry(); } void AsmPrinter::emitCFIInstruction(const MachineInstr &MI) { - ExceptionHandling::ExceptionsType ExceptionHandlingType = - MAI->getExceptionHandlingType(); + ExceptionHandling ExceptionHandlingType = MAI->getExceptionHandlingType(); if (ExceptionHandlingType != ExceptionHandling::DwarfCFI && ExceptionHandlingType != ExceptionHandling::ARM) return; @@ -870,6 +869,8 @@ void AsmPrinter::EmitFunctionBody() { OutStreamer.AddBlankLine(); } +static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP); + bool AsmPrinter::doFinalization(Module &M) { // Emit global variables. for (const auto &G : M.globals()) @@ -887,6 +888,54 @@ bool AsmPrinter::doFinalization(Module &M) { EmitVisibility(Name, V, false); } + // Get information about jump-instruction tables to print. + JumpInstrTableInfo *JITI = getAnalysisIfAvailable<JumpInstrTableInfo>(); + + if (JITI && !JITI->getTables().empty()) { + unsigned Arch = Triple(getTargetTriple()).getArch(); + bool IsThumb = (Arch == Triple::thumb || Arch == Triple::thumbeb); + MCInst TrapInst; + TM.getInstrInfo()->getTrap(TrapInst); + for (const auto &KV : JITI->getTables()) { + uint64_t Count = 0; + for (const auto &FunPair : KV.second) { + // Emit the function labels to make this be a function entry point. + MCSymbol *FunSym = + OutContext.GetOrCreateSymbol(FunPair.second->getName()); + OutStreamer.EmitSymbolAttribute(FunSym, MCSA_Global); + // FIXME: JumpTableInstrInfo should store information about the required + // alignment of table entries and the size of the padding instruction. + EmitAlignment(3); + if (IsThumb) + OutStreamer.EmitThumbFunc(FunSym); + if (MAI->hasDotTypeDotSizeDirective()) + OutStreamer.EmitSymbolAttribute(FunSym, MCSA_ELF_TypeFunction); + OutStreamer.EmitLabel(FunSym); + + // Emit the jump instruction to transfer control to the original + // function. + MCInst JumpToFun; + MCSymbol *TargetSymbol = + OutContext.GetOrCreateSymbol(FunPair.first->getName()); + const MCSymbolRefExpr *TargetSymRef = + MCSymbolRefExpr::Create(TargetSymbol, MCSymbolRefExpr::VK_PLT, + OutContext); + TM.getInstrInfo()->getUnconditionalBranch(JumpToFun, TargetSymRef); + OutStreamer.EmitInstruction(JumpToFun, getSubtargetInfo()); + ++Count; + } + + // Emit enough padding instructions to fill up to the next power of two. + // This assumes that the trap instruction takes 8 bytes or fewer. + uint64_t Remaining = NextPowerOf2(Count) - Count; + for (uint64_t C = 0; C < Remaining; ++C) { + EmitAlignment(3); + OutStreamer.EmitInstruction(TrapInst, getSubtargetInfo()); + } + + } + } + // Emit module flags. SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags; M.getModuleFlagsMetadata(ModuleFlags); @@ -932,10 +981,6 @@ bool AsmPrinter::doFinalization(Module &M) { for (const auto &Alias : M.aliases()) { MCSymbol *Name = getSymbol(&Alias); - const GlobalValue *GV = Alias.getAliasee(); - assert(!GV->isDeclaration()); - MCSymbol *Target = getSymbol(GV); - if (Alias.hasExternalLinkage() || !MAI->getWeakRefDirective()) OutStreamer.EmitSymbolAttribute(Name, MCSA_Global); else if (Alias.hasWeakLinkage() || Alias.hasLinkOnceLinkage()) @@ -947,7 +992,7 @@ bool AsmPrinter::doFinalization(Module &M) { // Emit the directives as assignments aka .set: OutStreamer.EmitAssignment(Name, - MCSymbolRefExpr::Create(Target, OutContext)); + lowerConstant(Alias.getAliasee(), *this)); } } @@ -1248,7 +1293,7 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { } // Ignore debug and non-emitted data. This handles llvm.compiler.used. - if (GV->getSection() == "llvm.metadata" || + if (StringRef(GV->getSection()) == "llvm.metadata" || GV->hasAvailableExternallyLinkage()) return true; @@ -1350,14 +1395,17 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) { for (Structor &S : Structors) { const TargetLoweringObjectFile &Obj = getObjFileLowering(); const MCSymbol *KeySym = nullptr; - const MCSection *KeySec = nullptr; - if (S.ComdatKey) { - KeySym = getSymbol(S.ComdatKey); - KeySec = getObjFileLowering().SectionForGlobal(S.ComdatKey, *Mang, TM); + if (GlobalValue *GV = S.ComdatKey) { + if (GV->hasAvailableExternallyLinkage()) + // If the associated variable is available_externally, some other TU + // will provide its dynamic initializer. + continue; + + KeySym = getSymbol(GV); } const MCSection *OutputSection = - (isCtor ? Obj.getStaticCtorSection(S.Priority, KeySym, KeySec) - : Obj.getStaticDtorSection(S.Priority, KeySym, KeySec)); + (isCtor ? Obj.getStaticCtorSection(S.Priority, KeySym) + : Obj.getStaticDtorSection(S.Priority, KeySym)); OutStreamer.SwitchSection(OutputSection); if (OutStreamer.getCurrentSection() != OutStreamer.getPreviousSection()) EmitAlignment(Align); @@ -1817,7 +1865,10 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) { SmallString<8> StrVal; CFP->getValueAPF().toString(StrVal); - CFP->getType()->print(AP.OutStreamer.GetCommentOS()); + if (CFP->getType()) + CFP->getType()->print(AP.OutStreamer.GetCommentOS()); + else + AP.OutStreamer.GetCommentOS() << "Printing <null> Type"; AP.OutStreamer.GetCommentOS() << ' ' << StrVal << '\n'; } @@ -1830,7 +1881,8 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) { // PPC's long double has odd notions of endianness compared to how LLVM // handles it: p[0] goes first for *big* endian on PPC. - if (AP.TM.getDataLayout()->isBigEndian() != CFP->getType()->isPPC_FP128Ty()) { + if (AP.TM.getDataLayout()->isBigEndian() && + !CFP->getType()->isPPC_FP128Ty()) { int Chunk = API.getNumWords() - 1; if (TrailingBytes) diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt index b4ef185..f555f21 100644 --- a/lib/CodeGen/AsmPrinter/CMakeLists.txt +++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt @@ -10,10 +10,10 @@ add_llvm_library(LLVMAsmPrinter DwarfAccelTable.cpp DwarfCFIException.cpp DwarfDebug.cpp - DwarfException.cpp DwarfFile.cpp DwarfStringPool.cpp DwarfUnit.cpp + EHStreamer.cpp ErlangGCPrinter.cpp OcamlGCPrinter.cpp Win64Exception.cpp diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp index 6103254..a66d08e 100644 --- a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp +++ b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp @@ -15,6 +15,7 @@ #include "llvm/Target/TargetRegisterInfo.h" #include <algorithm> #include <map> +#include <set> #define DEBUG_TYPE "dwarfdebug" @@ -110,45 +111,73 @@ static void clobberRegisterUses(RegDescribedVarsMap &RegVars, unsigned RegNo, RegVars.erase(I); } -// \brief Terminate location ranges for all variables, described by registers -// clobbered by @MI. -static void clobberRegisterUses(RegDescribedVarsMap &RegVars, - const MachineInstr &MI, - const TargetRegisterInfo *TRI, - DbgValueHistoryMap &HistMap) { +// \brief Collect all registers clobbered by @MI and insert them to @Regs. +static void collectClobberedRegisters(const MachineInstr &MI, + const TargetRegisterInfo *TRI, + std::set<unsigned> &Regs) { for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || !MO.isDef() || !MO.getReg()) continue; - for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); - ++AI) { - unsigned RegNo = *AI; - clobberRegisterUses(RegVars, RegNo, HistMap, MI); - } + for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI) + Regs.insert(*AI); } } -// \brief Terminate the location range for all register-described variables -// by inserting @ClobberingInstr to their history. -static void clobberAllRegistersUses(RegDescribedVarsMap &RegVars, - DbgValueHistoryMap &HistMap, - const MachineInstr &ClobberingInstr) { - for (const auto &I : RegVars) - for (const auto &Var : I.second) - HistMap.endInstrRange(Var, ClobberingInstr); - RegVars.clear(); +// \brief Returns the first instruction in @MBB which corresponds to +// the function epilogue, or nullptr if @MBB doesn't contain an epilogue. +static const MachineInstr *getFirstEpilogueInst(const MachineBasicBlock &MBB) { + auto LastMI = MBB.getLastNonDebugInstr(); + if (LastMI == MBB.end() || !LastMI->isReturn()) + return nullptr; + // Assume that epilogue starts with instruction having the same debug location + // as the return instruction. + DebugLoc LastLoc = LastMI->getDebugLoc(); + auto Res = LastMI; + for (MachineBasicBlock::const_reverse_iterator I(std::next(LastMI)); I != MBB.rend(); + ++I) { + if (I->getDebugLoc() != LastLoc) + return Res; + Res = std::prev(I.base()); + } + // If all instructions have the same debug location, assume whole MBB is + // an epilogue. + return MBB.begin(); +} + +// \brief Collect registers that are modified in the function body (their +// contents is changed only in the prologue and epilogue). +static void collectChangingRegs(const MachineFunction *MF, + const TargetRegisterInfo *TRI, + std::set<unsigned> &Regs) { + for (const auto &MBB : *MF) { + auto FirstEpilogueInst = getFirstEpilogueInst(MBB); + bool IsInEpilogue = false; + for (const auto &MI : MBB) { + IsInEpilogue |= &MI == FirstEpilogueInst; + if (!MI.getFlag(MachineInstr::FrameSetup) && !IsInEpilogue) + collectClobberedRegisters(MI, TRI, Regs); + } + } } void calculateDbgValueHistory(const MachineFunction *MF, const TargetRegisterInfo *TRI, DbgValueHistoryMap &Result) { - RegDescribedVarsMap RegVars; + std::set<unsigned> ChangingRegs; + collectChangingRegs(MF, TRI, ChangingRegs); + RegDescribedVarsMap RegVars; for (const auto &MBB : *MF) { for (const auto &MI : MBB) { if (!MI.isDebugValue()) { // Not a DBG_VALUE instruction. It may clobber registers which describe // some variables. - clobberRegisterUses(RegVars, MI, TRI, Result); + std::set<unsigned> MIClobberedRegs; + collectClobberedRegisters(MI, TRI, MIClobberedRegs); + for (unsigned RegNo : MIClobberedRegs) { + if (ChangingRegs.count(RegNo)) + clobberRegisterUses(RegVars, RegNo, Result, MI); + } continue; } @@ -167,8 +196,10 @@ void calculateDbgValueHistory(const MachineFunction *MF, // Make sure locations for register-described variables are valid only // until the end of the basic block (unless it's the last basic block, in // which case let their liveness run off to the end of the function). - if (!MBB.empty() && &MBB != &MF->back()) - clobberAllRegistersUses(RegVars, Result, MBB.back()); + if (!MBB.empty() && &MBB != &MF->back()) { + for (unsigned RegNo : ChangingRegs) + clobberRegisterUses(RegVars, RegNo, Result, MBB.back()); + } } } diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index 30312ac..74215aa 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -40,9 +40,8 @@ using namespace llvm; DwarfCFIException::DwarfCFIException(AsmPrinter *A) - : DwarfException(A), - shouldEmitPersonality(false), shouldEmitLSDA(false), shouldEmitMoves(false), - moveTypeModule(AsmPrinter::CFI_M_None) {} + : EHStreamer(A), shouldEmitPersonality(false), shouldEmitLSDA(false), + shouldEmitMoves(false), moveTypeModule(AsmPrinter::CFI_M_None) {} DwarfCFIException::~DwarfCFIException() {} @@ -59,26 +58,16 @@ void DwarfCFIException::endModule() { unsigned PerEncoding = TLOF.getPersonalityEncoding(); - if ((PerEncoding & 0x70) != dwarf::DW_EH_PE_pcrel) + if ((PerEncoding & 0x80) != dwarf::DW_EH_PE_indirect) return; // Emit references to all used personality functions - bool AtLeastOne = false; const std::vector<const Function*> &Personalities = MMI->getPersonalities(); for (size_t i = 0, e = Personalities.size(); i != e; ++i) { if (!Personalities[i]) continue; MCSymbol *Sym = Asm->getSymbol(Personalities[i]); TLOF.emitPersonalityValue(Asm->OutStreamer, Asm->TM, Sym); - AtLeastOne = true; - } - - if (AtLeastOne && !TLOF.isFunctionEHFrameSymbolPrivate()) { - // This is a temporary hack to keep sections in the same order they - // were before. This lets us produce bit identical outputs while - // transitioning to CFI. - Asm->OutStreamer.SwitchSection( - const_cast<TargetLoweringObjectFile&>(TLOF).getEHFrameSection()); } } @@ -123,9 +112,17 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) { TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI); Asm->OutStreamer.EmitCFIPersonality(Sym, PerEncoding); - Asm->OutStreamer.EmitDebugLabel - (Asm->GetTempSymbol("eh_func_begin", - Asm->getFunctionNumber())); + MCSymbol *EHBegin = + Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber()); + if (Asm->MAI->useAssignmentForEHBegin()) { + MCContext &Ctx = Asm->OutContext; + MCSymbol *CurPos = Ctx.CreateTempSymbol(); + Asm->OutStreamer.EmitLabel(CurPos); + Asm->OutStreamer.EmitAssignment(EHBegin, + MCSymbolRefExpr::Create(CurPos, Ctx)); + } else { + Asm->OutStreamer.EmitLabel(EHBegin); + } // Provide LSDA information. if (!shouldEmitLSDA) @@ -153,5 +150,5 @@ void DwarfCFIException::endFunction(const MachineFunction *) { // Map all labels and get rid of any dead landing pads. MMI->TidyLandingPads(); - EmitExceptionTable(); + emitExceptionTable(); } diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 2a0615d..77860c0 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -98,10 +98,6 @@ DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden, clEnumVal(Disable, "Disabled"), clEnumValEnd), cl::init(Default)); -static cl::opt<unsigned> -DwarfVersionNumber("dwarf-version", cl::Hidden, - cl::desc("Generate DWARF for dwarf version."), cl::init(0)); - static const char *const DWARFGroupName = "DWARF Emission"; static const char *const DbgTimerName = "DWARF Debug Writer"; @@ -209,9 +205,12 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) else HasDwarfPubSections = DwarfPubSections == Enable; + unsigned DwarfVersionNumber = Asm->TM.Options.MCOptions.DwarfVersion; DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber : MMI->getModule()->getDwarfVersion(); + Asm->OutStreamer.getContext().setDwarfVersion(DwarfVersion); + { NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); beginModule(); @@ -531,8 +530,7 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &TheCU, // shouldn't be found by lookup. AbsDef = &SPCU.createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, DIDescriptor()); - SPCU.applySubprogramAttributes(SP, *AbsDef); - SPCU.addGlobalName(SP.getName(), *AbsDef, resolve(SP.getContext())); + SPCU.applySubprogramAttributesToDefinition(SP, *AbsDef); SPCU.addUInt(*AbsDef, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined); createAndAddScopeChildren(SPCU, Scope, *AbsDef); @@ -732,6 +730,8 @@ void DwarfDebug::beginModule() { const Module *M = MMI->getModule(); + FunctionDIs = makeSubprogramMap(*M); + // If module has named metadata anchors then use them, otherwise scan the // module using debug info finder to collect debug info. NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); @@ -784,6 +784,26 @@ void DwarfDebug::beginModule() { SectionMap[Asm->getObjFileLowering().getTextSection()]; } +void DwarfDebug::finishVariableDefinitions() { + for (const auto &Var : ConcreteVariables) { + DIE *VariableDie = Var->getDIE(); + // FIXME: There shouldn't be any variables without DIEs. + if (!VariableDie) + continue; + // FIXME: Consider the time-space tradeoff of just storing the unit pointer + // in the ConcreteVariables list, rather than looking it up again here. + // DIE::getUnit isn't simple - it walks parent pointers, etc. + DwarfCompileUnit *Unit = lookupUnit(VariableDie->getUnit()); + assert(Unit); + DbgVariable *AbsVar = getExistingAbstractVariable(Var->getVariable()); + if (AbsVar && AbsVar->getDIE()) { + Unit->addDIEEntry(*VariableDie, dwarf::DW_AT_abstract_origin, + *AbsVar->getDIE()); + } else + Unit->applyVariableAttributes(*Var, *VariableDie); + } +} + void DwarfDebug::finishSubprogramDefinitions() { const Module *M = MMI->getModule(); @@ -811,8 +831,7 @@ void DwarfDebug::finishSubprogramDefinitions() { // inlined versions during codegen. D = SPCU->getOrCreateSubprogramDIE(SP); // And attach the attributes - SPCU->applySubprogramAttributes(SP, *D); - SPCU->addGlobalName(SP.getName(), *D, resolve(SP.getContext())); + SPCU->applySubprogramAttributesToDefinition(SP, *D); } } } @@ -850,8 +869,10 @@ void DwarfDebug::collectDeadVariables() { for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) { DIVariable DV(Variables.getElement(vi)); assert(DV.isVariable()); - DbgVariable NewVar(DV, nullptr, this); - SPDIE->addChild(SPCU->constructVariableDIE(NewVar)); + DbgVariable NewVar(DV, this); + auto VariableDie = SPCU->constructVariableDIE(NewVar); + SPCU->applyVariableAttributes(NewVar, *VariableDie); + SPDIE->addChild(std::move(VariableDie)); } } } @@ -861,6 +882,8 @@ void DwarfDebug::collectDeadVariables() { void DwarfDebug::finalizeModuleInfo() { finishSubprogramDefinitions(); + finishVariableDefinitions(); + // Collect info for variables that were optimized out. collectDeadVariables(); @@ -1017,9 +1040,9 @@ void DwarfDebug::endModule() { emitDebugInfoDWO(); emitDebugAbbrevDWO(); emitDebugLineDWO(); + emitDebugLocDWO(); // Emit DWO addresses. AddrPool.emit(*Asm, Asm->getObjFileLowering().getDwarfAddrSection()); - emitDebugLocDWO(); } else // Emit info into a debug loc section. emitDebugLoc(); @@ -1047,27 +1070,51 @@ void DwarfDebug::endModule() { } // Find abstract variable, if any, associated with Var. -DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &DV, - DebugLoc ScopeLoc) { - return findAbstractVariable(DV, ScopeLoc.getScope(DV->getContext())); -} - -DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &DV, - const MDNode *ScopeNode) { +DbgVariable *DwarfDebug::getExistingAbstractVariable(const DIVariable &DV, + DIVariable &Cleansed) { LLVMContext &Ctx = DV->getContext(); // More then one inlined variable corresponds to one abstract variable. - DIVariable Var = cleanseInlinedVariable(DV, Ctx); - auto I = AbstractVariables.find(Var); + // FIXME: This duplication of variables when inlining should probably be + // removed. It's done to allow each DIVariable to describe its location + // because the DebugLoc on the dbg.value/declare isn't accurate. We should + // make it accurate then remove this duplication/cleansing stuff. + Cleansed = cleanseInlinedVariable(DV, Ctx); + auto I = AbstractVariables.find(Cleansed); if (I != AbstractVariables.end()) return I->second.get(); + return nullptr; +} - LexicalScope *Scope = LScopes.findAbstractScope(ScopeNode); - if (!Scope) - return nullptr; +DbgVariable *DwarfDebug::getExistingAbstractVariable(const DIVariable &DV) { + DIVariable Cleansed; + return getExistingAbstractVariable(DV, Cleansed); +} - auto AbsDbgVariable = make_unique<DbgVariable>(Var, nullptr, this); +void DwarfDebug::createAbstractVariable(const DIVariable &Var, + LexicalScope *Scope) { + auto AbsDbgVariable = make_unique<DbgVariable>(Var, this); addScopeVariable(Scope, AbsDbgVariable.get()); - return (AbstractVariables[Var] = std::move(AbsDbgVariable)).get(); + AbstractVariables[Var] = std::move(AbsDbgVariable); +} + +void DwarfDebug::ensureAbstractVariableIsCreated(const DIVariable &DV, + const MDNode *ScopeNode) { + DIVariable Cleansed = DV; + if (getExistingAbstractVariable(DV, Cleansed)) + return; + + createAbstractVariable(Cleansed, LScopes.getOrCreateAbstractScope(ScopeNode)); +} + +void +DwarfDebug::ensureAbstractVariableIsCreatedIfScoped(const DIVariable &DV, + const MDNode *ScopeNode) { + DIVariable Cleansed = DV; + if (getExistingAbstractVariable(DV, Cleansed)) + return; + + if (LexicalScope *Scope = LScopes.findAbstractScope(ScopeNode)) + createAbstractVariable(Cleansed, Scope); } // If Var is a current function argument then add it to CurrentFnArguments list. @@ -1106,11 +1153,11 @@ void DwarfDebug::collectVariableInfoFromMMITable( if (!Scope) continue; - DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VI.Loc); - DbgVariable *RegVar = new DbgVariable(DV, AbsDbgVariable, this); + ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode()); + ConcreteVariables.push_back(make_unique<DbgVariable>(DV, this)); + DbgVariable *RegVar = ConcreteVariables.back().get(); RegVar->setFrameIndex(VI.Slot); - if (!addCurrentFnArgument(RegVar, Scope)) - addScopeVariable(Scope, RegVar); + addScopeVariable(Scope, RegVar); } } @@ -1175,18 +1222,14 @@ DwarfDebug::collectVariableInfo(SmallPtrSet<const MDNode *, 16> &Processed) { Processed.insert(DV); const MachineInstr *MInsn = Ranges.front().first; assert(MInsn->isDebugValue() && "History must begin with debug value"); - DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc()); - DbgVariable *RegVar = new DbgVariable(DV, AbsVar, this); - if (!addCurrentFnArgument(RegVar, Scope)) - addScopeVariable(Scope, RegVar); - if (AbsVar) - AbsVar->setMInsn(MInsn); + ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode()); + ConcreteVariables.push_back(make_unique<DbgVariable>(MInsn, this)); + DbgVariable *RegVar = ConcreteVariables.back().get(); + addScopeVariable(Scope, RegVar); // Check if the first DBG_VALUE is valid for the rest of the function. - if (Ranges.size() == 1 && Ranges.front().second == nullptr) { - RegVar->setMInsn(MInsn); + if (Ranges.size() == 1 && Ranges.front().second == nullptr) continue; - } // Handle multiple DBG_VALUE instructions describing one variable. RegVar->setDotDebugLocOffset(DotDebugLocEntries.size()); @@ -1205,6 +1248,11 @@ DwarfDebug::collectVariableInfo(SmallPtrSet<const MDNode *, 16> &Processed) { if (Begin->getNumOperands() > 1 && Begin->getOperand(0).isReg() && !Begin->getOperand(0).getReg()) continue; + DEBUG(dbgs() << "DotDebugLoc Pair:\n" << "\t" << *Begin); + if (End != nullptr) + DEBUG(dbgs() << "\t" << *End); + else + DEBUG(dbgs() << "\tNULL\n"); const MCSymbol *StartLabel = getLabelBeforeInsn(Begin); assert(StartLabel && "Forgot label before DBG_VALUE starting a range!"); @@ -1218,8 +1266,6 @@ DwarfDebug::collectVariableInfo(SmallPtrSet<const MDNode *, 16> &Processed) { EndLabel = getLabelBeforeInsn(std::next(I)->first); assert(EndLabel && "Forgot label after instruction ending a range!"); - DEBUG(dbgs() << "DotDebugLoc Pair:\n" - << "\t" << *Begin << "\t" << *End << "\n"); DebugLocEntry Loc(StartLabel, EndLabel, getDebugLocValue(Begin), TheCU); if (DebugLoc.empty() || !DebugLoc.back().Merge(Loc)) DebugLoc.push_back(std::move(Loc)); @@ -1233,11 +1279,11 @@ DwarfDebug::collectVariableInfo(SmallPtrSet<const MDNode *, 16> &Processed) { assert(DV.isVariable()); if (!Processed.insert(DV)) continue; - if (LexicalScope *Scope = LScopes.findLexicalScope(DV.getContext())) - addScopeVariable( - Scope, - new DbgVariable(DV, findAbstractVariable(DV, Scope->getScopeNode()), - this)); + if (LexicalScope *Scope = LScopes.findLexicalScope(DV.getContext())) { + ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode()); + ConcreteVariables.push_back(make_unique<DbgVariable>(DV, this)); + addScopeVariable(Scope, ConcreteVariables.back().get()); + } } } @@ -1371,6 +1417,10 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { if (!MMI->hasDebugInfo()) return; + auto DI = FunctionDIs.find(MF->getFunction()); + if (DI == FunctionDIs.end()) + return; + // Grab the lexical scopes for the function, if we don't have any of those // then we're not going to be able to do anything. LScopes.initialize(*MF); @@ -1386,6 +1436,14 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // belongs to so that we add to the correct per-cu line table in the // non-asm case. LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); + // FnScope->getScopeNode() and DI->second should represent the same function, + // though they may not be the same MDNode due to inline functions merged in + // LTO where the debug info metadata still differs (either due to distinct + // written differences - two versions of a linkonce_odr function + // written/copied into two separate files, or some sub-optimal metadata that + // isn't structurally identical (see: file path/name info from clang, which + // includes the directory of the cpp file being built, even when the file name + // is absolute (such as an <> lookup header))) DwarfCompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode()); assert(TheCU && "Unable to find compile unit!"); if (Asm->OutStreamer.hasRawTextSupport()) @@ -1440,6 +1498,8 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { } void DwarfDebug::addScopeVariable(LexicalScope *LS, DbgVariable *Var) { + if (addCurrentFnArgument(Var, LS)) + return; SmallVectorImpl<DbgVariable *> &Vars = ScopeVariables[LS]; DIVariable DV = Var->getVariable(); // Variables with positive arg numbers are parameters. @@ -1481,7 +1541,8 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { assert(CurFn == MF); assert(CurFn != nullptr); - if (!MMI->hasDebugInfo() || LScopes.empty()) { + if (!MMI->hasDebugInfo() || LScopes.empty() || + !FunctionDIs.count(MF->getFunction())) { // If we don't have a lexical scope for this function then there will // be a hole in the range information. Keep note of this by setting the // previously used section to nullptr. @@ -1517,7 +1578,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { assert(DV && DV.isVariable()); if (!ProcessedVars.insert(DV)) continue; - findAbstractVariable(DV, DV.getContext()); + ensureAbstractVariableIsCreated(DV, DV.getContext()); } constructAbstractSubprogramScopeDIE(TheCU, AScope); } @@ -1536,12 +1597,8 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { // Ownership of DbgVariables is a bit subtle - ScopeVariables owns all the // DbgVariables except those that are also in AbstractVariables (since they // can be used cross-function) - for (const auto &I : ScopeVariables) - for (const auto *Var : I.second) - if (!AbstractVariables.count(Var->getVariable()) || Var->getAbstractVariable()) - delete Var; ScopeVariables.clear(); - DeleteContainerPointers(CurrentFnArguments); + CurrentFnArguments.clear(); DbgValues.clear(); LabelsBeforeInsn.clear(); LabelsAfterInsn.clear(); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 2f5abc8..ffe4843 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -27,6 +27,7 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/CodeGen/LexicalScopes.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/MC/MachineLocation.h" @@ -71,16 +72,21 @@ class DbgVariable { DIVariable Var; // Variable Descriptor. DIE *TheDIE; // Variable DIE. unsigned DotDebugLocOffset; // Offset in DotDebugLocEntries. - DbgVariable *AbsVar; // Corresponding Abstract variable, if any. const MachineInstr *MInsn; // DBG_VALUE instruction of the variable. int FrameIndex; DwarfDebug *DD; public: - // AbsVar may be NULL. - DbgVariable(DIVariable V, DbgVariable *AV, DwarfDebug *DD) - : Var(V), TheDIE(nullptr), DotDebugLocOffset(~0U), AbsVar(AV), - MInsn(nullptr), FrameIndex(~0), DD(DD) {} + /// Construct a DbgVariable from a DIVariable. + DbgVariable(DIVariable V, DwarfDebug *DD) + : Var(V), TheDIE(nullptr), DotDebugLocOffset(~0U), MInsn(nullptr), + FrameIndex(~0), DD(DD) {} + + /// Construct a DbgVariable from a DEBUG_VALUE. + /// AbstractVar may be NULL. + DbgVariable(const MachineInstr *DbgValue, DwarfDebug *DD) + : Var(DbgValue->getDebugVariable()), TheDIE(nullptr), + DotDebugLocOffset(~0U), MInsn(DbgValue), FrameIndex(~0), DD(DD) {} // Accessors. DIVariable getVariable() const { return Var; } @@ -89,9 +95,7 @@ public: void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; } unsigned getDotDebugLocOffset() const { return DotDebugLocOffset; } StringRef getName() const { return Var.getName(); } - DbgVariable *getAbstractVariable() const { return AbsVar; } const MachineInstr *getMInsn() const { return MInsn; } - void setMInsn(const MachineInstr *M) { MInsn = M; } int getFrameIndex() const { return FrameIndex; } void setFrameIndex(int FI) { FrameIndex = FI; } // Translate tag to proper Dwarf tag. @@ -200,6 +204,7 @@ class DwarfDebug : public AsmPrinterHandler { // Collection of abstract variables. DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> AbstractVariables; + SmallVector<std::unique_ptr<DbgVariable>, 64> ConcreteVariables; // Collection of DebugLocEntry. Stored in a linked list so that DIELocLists // can refer to them in spite of insertions into this list. @@ -325,6 +330,8 @@ class DwarfDebug : public AsmPrinterHandler { DwarfAccelTable AccelNamespace; DwarfAccelTable AccelTypes; + DenseMap<const Function *, DISubprogram> FunctionDIs; + MCDwarfDwoLineTable *getDwoLineTable(const DwarfCompileUnit &); void addScopeVariable(LexicalScope *LS, DbgVariable *Var); @@ -334,8 +341,14 @@ class DwarfDebug : public AsmPrinterHandler { } /// \brief Find abstract variable associated with Var. - DbgVariable *findAbstractVariable(DIVariable &Var, DebugLoc Loc); - DbgVariable *findAbstractVariable(DIVariable &Var, const MDNode *Scope); + DbgVariable *getExistingAbstractVariable(const DIVariable &DV, + DIVariable &Cleansed); + DbgVariable *getExistingAbstractVariable(const DIVariable &DV); + void createAbstractVariable(const DIVariable &DV, LexicalScope *Scope); + void ensureAbstractVariableIsCreated(const DIVariable &Var, + const MDNode *Scope); + void ensureAbstractVariableIsCreatedIfScoped(const DIVariable &Var, + const MDNode *Scope); /// \brief Find DIE for the given subprogram and attach appropriate /// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global @@ -389,6 +402,8 @@ class DwarfDebug : public AsmPrinterHandler { /// \brief Collect info for variables that were optimized out. void collectDeadVariables(); + void finishVariableDefinitions(); + void finishSubprogramDefinitions(); /// \brief Finish off debug information after all functions have been diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h index f792482..0440fce 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/lib/CodeGen/AsmPrinter/DwarfException.h @@ -14,138 +14,14 @@ #ifndef LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H #define LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H -#include "AsmPrinterHandler.h" -#include "llvm/ADT/DenseMap.h" +#include "EHStreamer.h" #include "llvm/CodeGen/AsmPrinter.h" -#include <vector> namespace llvm { - -template <typename T> class SmallVectorImpl; -struct LandingPadInfo; -class MachineModuleInfo; -class MachineInstr; class MachineFunction; -class MCAsmInfo; -class MCExpr; -class MCSymbol; -class Function; class ARMTargetStreamer; -class AsmPrinter; - -//===----------------------------------------------------------------------===// -/// DwarfException - Emits Dwarf exception handling directives. -/// -class DwarfException : public AsmPrinterHandler { -protected: - /// Asm - Target of Dwarf emission. - AsmPrinter *Asm; - - /// MMI - Collected machine module information. - MachineModuleInfo *MMI; - - /// SharedTypeIds - How many leading type ids two landing pads have in common. - static unsigned SharedTypeIds(const LandingPadInfo *L, - const LandingPadInfo *R); - - /// PadRange - Structure holding a try-range and the associated landing pad. - struct PadRange { - // The index of the landing pad. - unsigned PadIndex; - // The index of the begin and end labels in the landing pad's label lists. - unsigned RangeIndex; - }; - - typedef DenseMap<MCSymbol *, PadRange> RangeMapType; - - /// ActionEntry - Structure describing an entry in the actions table. - struct ActionEntry { - int ValueForTypeID; // The value to write - may not be equal to the type id. - int NextAction; - unsigned Previous; - }; - - /// CallSiteEntry - Structure describing an entry in the call-site table. - struct CallSiteEntry { - // The 'try-range' is BeginLabel .. EndLabel. - MCSymbol *BeginLabel; // zero indicates the start of the function. - MCSymbol *EndLabel; // zero indicates the end of the function. - - // The landing pad starts at PadLabel. - MCSymbol *PadLabel; // zero indicates that there is no landing pad. - unsigned Action; - }; - - /// ComputeActionsTable - Compute the actions table and gather the first - /// action index for each landing pad site. - unsigned ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*>&LPs, - SmallVectorImpl<ActionEntry> &Actions, - SmallVectorImpl<unsigned> &FirstActions); - - /// CallToNoUnwindFunction - Return `true' if this is a call to a function - /// marked `nounwind'. Return `false' otherwise. - bool CallToNoUnwindFunction(const MachineInstr *MI); - - /// ComputeCallSiteTable - Compute the call-site table. The entry for an - /// invoke has a try-range containing the call, a non-zero landing pad and an - /// appropriate action. The entry for an ordinary call has a try-range - /// containing the call and zero for the landing pad and the action. Calls - /// marked 'nounwind' have no entry and must not be contained in the try-range - /// of any entry - they form gaps in the table. Entries must be ordered by - /// try-range address. - void ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, - const RangeMapType &PadMap, - const SmallVectorImpl<const LandingPadInfo *> &LPs, - const SmallVectorImpl<unsigned> &FirstActions); - - /// EmitExceptionTable - Emit landing pads and actions. - /// - /// The general organization of the table is complex, but the basic concepts - /// are easy. First there is a header which describes the location and - /// organization of the three components that follow. - /// 1. The landing pad site information describes the range of code covered - /// by the try. In our case it's an accumulation of the ranges covered - /// by the invokes in the try. There is also a reference to the landing - /// pad that handles the exception once processed. Finally an index into - /// the actions table. - /// 2. The action table, in our case, is composed of pairs of type ids - /// and next action offset. Starting with the action index from the - /// landing pad site, each type Id is checked for a match to the current - /// exception. If it matches then the exception and type id are passed - /// on to the landing pad. Otherwise the next action is looked up. This - /// chain is terminated with a next action of zero. If no type id is - /// found the frame is unwound and handling continues. - /// 3. Type id table contains references to all the C++ typeinfo for all - /// catches in the function. This tables is reversed indexed base 1. - void EmitExceptionTable(); - - virtual void EmitTypeInfos(unsigned TTypeEncoding); - -public: - //===--------------------------------------------------------------------===// - // Main entry points. - // - DwarfException(AsmPrinter *A); - virtual ~DwarfException(); - - /// endModule - Emit all exception information that should come after the - /// content. - void endModule() override; - - /// beginFunction - Gather pre-function exception information. Assumes being - /// emitted immediately after the function entry point. - void beginFunction(const MachineFunction *MF) override; - - /// endFunction - Gather and emit post-function exception information. - void endFunction(const MachineFunction *) override; - - // We don't need these. - void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {} - void beginInstruction(const MachineInstr *MI) override {} - void endInstruction() override {} -}; -class DwarfCFIException : public DwarfException { +class DwarfCFIException : public EHStreamer { /// shouldEmitPersonality - Per-function flag to indicate if .cfi_personality /// should be emitted. bool shouldEmitPersonality; @@ -179,8 +55,8 @@ public: void endFunction(const MachineFunction *) override; }; -class ARMException : public DwarfException { - void EmitTypeInfos(unsigned TTypeEncoding) override; +class ARMException : public EHStreamer { + void emitTypeInfos(unsigned TTypeEncoding) override; ARMTargetStreamer &getTargetStreamer(); /// shouldEmitCFI - Per-function flag to indicate if frame CFI info @@ -206,7 +82,7 @@ public: void endFunction(const MachineFunction *) override; }; -class Win64Exception : public DwarfException { +class Win64Exception : public EHStreamer { /// shouldEmitPersonality - Per-function flag to indicate if personality /// info should be emitted. bool shouldEmitPersonality; diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index a70c0f7..9538bee 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -1071,6 +1071,8 @@ std::string DwarfUnit::getParentContextString(DIScope Context) const { I != E; ++I) { DIScope Ctx = *I; StringRef Name = Ctx.getName(); + if (Name.empty() && Ctx.isNameSpace()) + Name = "(anonymous namespace)"; if (!Name.empty()) { CS += Name; CS += "::"; @@ -1359,12 +1361,13 @@ DIE *DwarfUnit::getOrCreateNameSpace(DINameSpace NS) { return NDie; DIE &NDie = createAndAddDIE(dwarf::DW_TAG_namespace, *ContextDIE, NS); - if (!NS.getName().empty()) { + StringRef Name = NS.getName(); + if (!Name.empty()) addString(NDie, dwarf::DW_AT_name, NS.getName()); - DD->addAccelNamespace(NS.getName(), NDie); - addGlobalName(NS.getName(), NDie, NS.getContext()); - } else - DD->addAccelNamespace("(anonymous namespace)", NDie); + else + Name = "(anonymous namespace)"; + DD->addAccelNamespace(Name, NDie); + addGlobalName(Name, NDie, NS.getContext()); addSourceLine(NDie, NS); return &NDie; } @@ -1382,14 +1385,14 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP) { if (DISubprogram SPDecl = SP.getFunctionDeclaration()) { // Add subprogram definitions to the CU die directly. ContextDIE = &getUnitDie(); - // Build the decl now to ensure it preceeds the definition. + // Build the decl now to ensure it precedes the definition. getOrCreateSubprogramDIE(SPDecl); } // DW_TAG_inlined_subroutine may refer to this DIE. DIE &SPDie = createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, SP); - // Abort here and fill this in later, depending on whether or not this + // Stop here and fill this in later, depending on whether or not this // subprogram turns out to have inlined instances or not. if (SP.isDefinition()) return &SPDie; @@ -1398,12 +1401,21 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP) { return &SPDie; } +void DwarfUnit::applySubprogramAttributesToDefinition(DISubprogram SP, DIE &SPDie) { + DISubprogram SPDecl = SP.getFunctionDeclaration(); + DIScope Context = resolve(SPDecl ? SPDecl.getContext() : SP.getContext()); + applySubprogramAttributes(SP, SPDie); + addGlobalName(SP.getName(), SPDie, Context); +} + void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie) { DIE *DeclDie = nullptr; StringRef DeclLinkageName; if (DISubprogram SPDecl = SP.getFunctionDeclaration()) { DeclDie = getDIE(SPDecl); - assert(DeclDie); + assert(DeclDie && "This DIE should've already been constructed when the " + "definition DIE was created in " + "getOrCreateSubprogramDIE"); DeclLinkageName = SPDecl.getLinkageName(); } @@ -1502,6 +1514,17 @@ void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie) { addFlag(SPDie, dwarf::DW_AT_explicit); } +void DwarfUnit::applyVariableAttributes(const DbgVariable &Var, + DIE &VariableDie) { + StringRef Name = Var.getName(); + if (!Name.empty()) + addString(VariableDie, dwarf::DW_AT_name, Name); + addSourceLine(VariableDie, Var.getVariable()); + addType(VariableDie, Var.getType()); + if (Var.isArtificial()) + addFlag(VariableDie, dwarf::DW_AT_artificial); +} + // Return const expression if value is a GEP to access merged global // constant. e.g. // i8* getelementptr ({ i8, i8, i8, i8 }* @_MergedGlobals, i32 0, i32 0) @@ -1665,10 +1688,8 @@ void DwarfCompileUnit::createGlobalVariableDIE(DIGlobalVariable GV) { DD->addAccelName(GV.getLinkageName(), AddrDIE); } - if (!GV.isLocalToUnit()) - addGlobalName(GV.getName(), - VariableSpecDIE ? *VariableSpecDIE : *VariableDIE, - GV.getContext()); + addGlobalName(GV.getName(), VariableSpecDIE ? *VariableSpecDIE : *VariableDIE, + GV.getContext()); } /// constructSubrangeDIE - Construct subrange DIE from DISubrange. @@ -1777,24 +1798,13 @@ std::unique_ptr<DIE> DwarfUnit::constructVariableDIE(DbgVariable &DV, std::unique_ptr<DIE> DwarfUnit::constructVariableDIEImpl(const DbgVariable &DV, bool Abstract) { - StringRef Name = DV.getName(); - // Define variable debug information entry. auto VariableDie = make_unique<DIE>(DV.getTag()); - DbgVariable *AbsVar = DV.getAbstractVariable(); - if (AbsVar && AbsVar->getDIE()) - addDIEEntry(*VariableDie, dwarf::DW_AT_abstract_origin, *AbsVar->getDIE()); - else { - if (!Name.empty()) - addString(*VariableDie, dwarf::DW_AT_name, Name); - addSourceLine(*VariableDie, DV.getVariable()); - addType(*VariableDie, DV.getType()); - if (DV.isArtificial()) - addFlag(*VariableDie, dwarf::DW_AT_artificial); - } - if (Abstract) + if (Abstract) { + applyVariableAttributes(DV, *VariableDie); return VariableDie; + } // Add variable address. diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h index acb7528..b7b83b2 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -400,6 +400,8 @@ public: DIE *getOrCreateSubprogramDIE(DISubprogram SP); void applySubprogramAttributes(DISubprogram SP, DIE &SPDie); + void applySubprogramAttributesToDefinition(DISubprogram SP, DIE &SPDie); + void applyVariableAttributes(const DbgVariable &Var, DIE &VariableDie); /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the /// given DIType. diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/EHStreamer.cpp index 3a12c73..73f62bf 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.cpp +++ b/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -1,4 +1,4 @@ -//===-- CodeGen/AsmPrinter/DwarfException.cpp - Dwarf Exception Impl ------===// +//===-- CodeGen/AsmPrinter/EHStreamer.cpp - Exception Directive Streamer --===// // // The LLVM Compiler Infrastructure // @@ -7,45 +7,31 @@ // //===----------------------------------------------------------------------===// // -// This file contains support for writing DWARF exception info into asm files. +// This file contains support for writing exception info into assembly files. // //===----------------------------------------------------------------------===// -#include "DwarfException.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/Twine.h" +#include "EHStreamer.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/Mangler.h" -#include "llvm/IR/Module.h" +#include "llvm/IR/Function.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Support/Dwarf.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FormattedStream.h" #include "llvm/Support/LEB128.h" -#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegisterInfo.h" + using namespace llvm; -DwarfException::DwarfException(AsmPrinter *A) - : Asm(A), MMI(Asm->MMI) {} +EHStreamer::EHStreamer(AsmPrinter *A) : Asm(A), MMI(Asm->MMI) {} -DwarfException::~DwarfException() {} +EHStreamer::~EHStreamer() {} -/// SharedTypeIds - How many leading type ids two landing pads have in common. -unsigned DwarfException::SharedTypeIds(const LandingPadInfo *L, - const LandingPadInfo *R) { +/// How many leading type ids two landing pads have in common. +unsigned EHStreamer::sharedTypeIDs(const LandingPadInfo *L, + const LandingPadInfo *R) { const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds; unsigned LSize = LIds.size(), RSize = RIds.size(); unsigned MinSize = LSize < RSize ? LSize : RSize; @@ -58,10 +44,10 @@ unsigned DwarfException::SharedTypeIds(const LandingPadInfo *L, return Count; } -/// ComputeActionsTable - Compute the actions table and gather the first action -/// index for each landing pad site. -unsigned DwarfException:: -ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads, +/// Compute the actions table and gather the first action index for each landing +/// pad site. +unsigned EHStreamer:: +computeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads, SmallVectorImpl<ActionEntry> &Actions, SmallVectorImpl<unsigned> &FirstActions) { @@ -109,7 +95,7 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads, I = LandingPads.begin(), E = LandingPads.end(); I != E; ++I) { const LandingPadInfo *LPI = *I; const std::vector<int> &TypeIds = LPI->TypeIds; - unsigned NumShared = PrevLPI ? SharedTypeIds(LPI, PrevLPI) : 0; + unsigned NumShared = PrevLPI ? sharedTypeIDs(LPI, PrevLPI) : 0; unsigned SizeSiteActions = 0; if (NumShared < TypeIds.size()) { @@ -167,9 +153,9 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads, return SizeActions; } -/// CallToNoUnwindFunction - Return `true' if this is a call to a function -/// marked `nounwind'. Return `false' otherwise. -bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) { +/// Return `true' if this is a call to a function marked `nounwind'. Return +/// `false' otherwise. +bool EHStreamer::callToNoUnwindFunction(const MachineInstr *MI) { assert(MI->isCall() && "This should be a call instruction!"); bool MarkedNoUnwind = false; @@ -201,15 +187,14 @@ bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) { return MarkedNoUnwind; } -/// ComputeCallSiteTable - Compute the call-site table. The entry for an invoke -/// has a try-range containing the call, a non-zero landing pad, and an -/// appropriate action. The entry for an ordinary call has a try-range -/// containing the call and zero for the landing pad and the action. Calls -/// marked 'nounwind' have no entry and must not be contained in the try-range -/// of any entry - they form gaps in the table. Entries must be ordered by -/// try-range address. -void DwarfException:: -ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, +/// Compute the call-site table. The entry for an invoke has a try-range +/// containing the call, a non-zero landing pad, and an appropriate action. The +/// entry for an ordinary call has a try-range containing the call and zero for +/// the landing pad and the action. Calls marked 'nounwind' have no entry and +/// must not be contained in the try-range of any entry - they form gaps in the +/// table. Entries must be ordered by try-range address. +void EHStreamer:: +computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, const RangeMapType &PadMap, const SmallVectorImpl<const LandingPadInfo *> &LandingPads, const SmallVectorImpl<unsigned> &FirstActions) { @@ -228,7 +213,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, for (const auto &MI : MBB) { if (!MI.isEHLabel()) { if (MI.isCall()) - SawPotentiallyThrowing |= !CallToNoUnwindFunction(&MI); + SawPotentiallyThrowing |= !callToNoUnwindFunction(&MI); continue; } @@ -308,7 +293,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, } } -/// EmitExceptionTable - Emit landing pads and actions. +/// Emit landing pads and actions. /// /// The general organization of the table is complex, but the basic concepts are /// easy. First there is a header which describes the location and organization @@ -328,7 +313,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, /// unwound and handling continues. /// 3. Type ID table contains references to all the C++ typeinfo for all /// catches in the function. This tables is reverse indexed base 1. -void DwarfException::EmitExceptionTable() { +void EHStreamer::emitExceptionTable() { const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos(); const std::vector<unsigned> &FilterIds = MMI->getFilterIds(); const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads(); @@ -350,7 +335,8 @@ void DwarfException::EmitExceptionTable() { // landing pad site. SmallVector<ActionEntry, 32> Actions; SmallVector<unsigned, 64> FirstActions; - unsigned SizeActions=ComputeActionsTable(LandingPads, Actions, FirstActions); + unsigned SizeActions = + computeActionsTable(LandingPads, Actions, FirstActions); // Invokes and nounwind calls have entries in PadMap (due to being bracketed // by try-range labels when lowered). Ordinary calls do not, so appropriate @@ -368,7 +354,7 @@ void DwarfException::EmitExceptionTable() { // Compute the call-site table. SmallVector<CallSiteEntry, 64> CallSites; - ComputeCallSiteTable(CallSites, PadMap, LandingPads, FirstActions); + computeCallSiteTable(CallSites, PadMap, LandingPads, FirstActions); // Final tallies. @@ -657,12 +643,12 @@ void DwarfException::EmitExceptionTable() { Asm->EmitSLEB128(Action.NextAction); } - EmitTypeInfos(TTypeEncoding); + emitTypeInfos(TTypeEncoding); Asm->EmitAlignment(2); } -void DwarfException::EmitTypeInfos(unsigned TTypeEncoding) { +void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) { const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos(); const std::vector<unsigned> &FilterIds = MMI->getFilterIds(); @@ -703,19 +689,18 @@ void DwarfException::EmitTypeInfos(unsigned TTypeEncoding) { } } -/// endModule - Emit all exception information that should come after the -/// content. -void DwarfException::endModule() { +/// Emit all exception information that should come after the content. +void EHStreamer::endModule() { llvm_unreachable("Should be implemented"); } -/// beginFunction - Gather pre-function exception information. Assumes it's -/// being emitted immediately after the function entry point. -void DwarfException::beginFunction(const MachineFunction *MF) { +/// Gather pre-function exception information. Assumes it's being emitted +/// immediately after the function entry point. +void EHStreamer::beginFunction(const MachineFunction *MF) { llvm_unreachable("Should be implemented"); } -/// endFunction - Gather and emit post-function exception information. -void DwarfException::endFunction(const MachineFunction *) { +/// Gather and emit post-function exception information. +void EHStreamer::endFunction(const MachineFunction *) { llvm_unreachable("Should be implemented"); } diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.h b/lib/CodeGen/AsmPrinter/EHStreamer.h new file mode 100644 index 0000000..2b6ba78 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/EHStreamer.h @@ -0,0 +1,138 @@ +//===-- EHStreamer.h - Exception Handling Directive Streamer ---*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing exception info into assembly files. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_ASMPRINTER_EHSTREAMER_H +#define LLVM_CODEGEN_ASMPRINTER_EHSTREAMER_H + +#include "AsmPrinterHandler.h" +#include "llvm/ADT/DenseMap.h" + +namespace llvm { +struct LandingPadInfo; +class MachineModuleInfo; +class MachineInstr; +class MachineFunction; +class AsmPrinter; + +template <typename T> +class SmallVectorImpl; + +/// Emits exception handling directives. +class EHStreamer : public AsmPrinterHandler { +protected: + /// Target of directive emission. + AsmPrinter *Asm; + + /// Collected machine module information. + MachineModuleInfo *MMI; + + /// How many leading type ids two landing pads have in common. + static unsigned sharedTypeIDs(const LandingPadInfo *L, + const LandingPadInfo *R); + + /// Structure holding a try-range and the associated landing pad. + struct PadRange { + // The index of the landing pad. + unsigned PadIndex; + // The index of the begin and end labels in the landing pad's label lists. + unsigned RangeIndex; + }; + + typedef DenseMap<MCSymbol *, PadRange> RangeMapType; + + /// Structure describing an entry in the actions table. + struct ActionEntry { + int ValueForTypeID; // The value to write - may not be equal to the type id. + int NextAction; + unsigned Previous; + }; + + /// Structure describing an entry in the call-site table. + struct CallSiteEntry { + // The 'try-range' is BeginLabel .. EndLabel. + MCSymbol *BeginLabel; // zero indicates the start of the function. + MCSymbol *EndLabel; // zero indicates the end of the function. + + // The landing pad starts at PadLabel. + MCSymbol *PadLabel; // zero indicates that there is no landing pad. + unsigned Action; + }; + + /// Compute the actions table and gather the first action index for each + /// landing pad site. + unsigned computeActionsTable(const SmallVectorImpl<const LandingPadInfo*>&LPs, + SmallVectorImpl<ActionEntry> &Actions, + SmallVectorImpl<unsigned> &FirstActions); + + /// Return `true' if this is a call to a function marked `nounwind'. Return + /// `false' otherwise. + bool callToNoUnwindFunction(const MachineInstr *MI); + + /// Compute the call-site table. The entry for an invoke has a try-range + /// containing the call, a non-zero landing pad and an appropriate action. + /// The entry for an ordinary call has a try-range containing the call and + /// zero for the landing pad and the action. Calls marked 'nounwind' have + /// no entry and must not be contained in the try-range of any entry - they + /// form gaps in the table. Entries must be ordered by try-range address. + + void computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, + const RangeMapType &PadMap, + const SmallVectorImpl<const LandingPadInfo *> &LPs, + const SmallVectorImpl<unsigned> &FirstActions); + + /// Emit landing pads and actions. + /// + /// The general organization of the table is complex, but the basic concepts + /// are easy. First there is a header which describes the location and + /// organization of the three components that follow. + /// 1. The landing pad site information describes the range of code covered + /// by the try. In our case it's an accumulation of the ranges covered + /// by the invokes in the try. There is also a reference to the landing + /// pad that handles the exception once processed. Finally an index into + /// the actions table. + /// 2. The action table, in our case, is composed of pairs of type ids + /// and next action offset. Starting with the action index from the + /// landing pad site, each type Id is checked for a match to the current + /// exception. If it matches then the exception and type id are passed + /// on to the landing pad. Otherwise the next action is looked up. This + /// chain is terminated with a next action of zero. If no type id is + /// found the frame is unwound and handling continues. + /// 3. Type id table contains references to all the C++ typeinfo for all + /// catches in the function. This tables is reversed indexed base 1. + void emitExceptionTable(); + + virtual void emitTypeInfos(unsigned TTypeEncoding); + +public: + EHStreamer(AsmPrinter *A); + virtual ~EHStreamer(); + + /// Emit all exception information that should come after the content. + void endModule() override; + + /// Gather pre-function exception information. Assumes being emitted + /// immediately after the function entry point. + void beginFunction(const MachineFunction *MF) override; + + /// Gather and emit post-function exception information. + void endFunction(const MachineFunction *) override; + + // Unused. + void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {} + void beginInstruction(const MachineInstr *MI) override {} + void endInstruction() override {} +}; +} + +#endif + diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/lib/CodeGen/AsmPrinter/Win64Exception.cpp index 17d8bff..81285d5 100644 --- a/lib/CodeGen/AsmPrinter/Win64Exception.cpp +++ b/lib/CodeGen/AsmPrinter/Win64Exception.cpp @@ -38,9 +38,8 @@ using namespace llvm; Win64Exception::Win64Exception(AsmPrinter *A) - : DwarfException(A), - shouldEmitPersonality(false), shouldEmitLSDA(false), shouldEmitMoves(false) - {} + : EHStreamer(A), shouldEmitPersonality(false), shouldEmitLSDA(false), + shouldEmitMoves(false) {} Win64Exception::~Win64Exception() {} @@ -73,14 +72,14 @@ void Win64Exception::beginFunction(const MachineFunction *MF) { if (!shouldEmitPersonality && !shouldEmitMoves) return; - Asm->OutStreamer.EmitWin64EHStartProc(Asm->CurrentFnSym); + Asm->OutStreamer.EmitWinCFIStartProc(Asm->CurrentFnSym); if (!shouldEmitPersonality) return; - MCSymbol *GCCHandlerSym = - Asm->GetExternalSymbolSymbol("_GCC_specific_handler"); - Asm->OutStreamer.EmitWin64EHHandler(GCCHandlerSym, true, true); + const MCSymbol *PersHandlerSym = + TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI); + Asm->OutStreamer.EmitWinEHHandler(PersHandlerSym, true, true); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber())); @@ -99,17 +98,10 @@ void Win64Exception::endFunction(const MachineFunction *) { MMI->TidyLandingPads(); if (shouldEmitPersonality) { - const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); - const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()]; - const MCSymbol *Sym = - TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI); - Asm->OutStreamer.PushSection(); - Asm->OutStreamer.EmitWin64EHHandlerData(); - Asm->OutStreamer.EmitValue(MCSymbolRefExpr::Create(Sym, Asm->OutContext), - 4); - EmitExceptionTable(); + Asm->OutStreamer.EmitWinEHHandlerData(); + emitExceptionTable(); Asm->OutStreamer.PopSection(); } - Asm->OutStreamer.EmitWin64EHEndProc(); + Asm->OutStreamer.EmitWinCFIEndProc(); } diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp index 2212941..6a5c431 100644 --- a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp +++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp @@ -308,7 +308,7 @@ void WinCodeViewLineTables::endFunction(const MachineFunction *MF) { return; const Function *GV = MF->getFunction(); - assert(FnDebugInfo.count(GV) == true); + assert(FnDebugInfo.count(GV)); assert(CurFn == &FnDebugInfo[GV]); if (CurFn->Instrs.empty()) { diff --git a/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp b/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp index d995333..421946d 100644 --- a/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp +++ b/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp @@ -21,17 +21,19 @@ #include "llvm/Support/Debug.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" + using namespace llvm; #define DEBUG_TYPE "arm-atomic-expand" namespace { class AtomicExpandLoadLinked : public FunctionPass { - const TargetLowering *TLI; + const TargetMachine *TM; public: static char ID; // Pass identification, replacement for typeid explicit AtomicExpandLoadLinked(const TargetMachine *TM = nullptr) - : FunctionPass(ID), TLI(TM ? TM->getTargetLowering() : nullptr) { + : FunctionPass(ID), TM(TM) { initializeAtomicExpandLoadLinkedPass(*PassRegistry::getPassRegistry()); } @@ -50,29 +52,16 @@ namespace { char AtomicExpandLoadLinked::ID = 0; char &llvm::AtomicExpandLoadLinkedID = AtomicExpandLoadLinked::ID; - -static void *initializeAtomicExpandLoadLinkedPassOnce(PassRegistry &Registry) { - PassInfo *PI = new PassInfo( - "Expand Atomic calls in terms of load-linked & store-conditional", - "atomic-ll-sc", &AtomicExpandLoadLinked::ID, - PassInfo::NormalCtor_t(callDefaultCtor<AtomicExpandLoadLinked>), false, - false, PassInfo::TargetMachineCtor_t( - callTargetMachineCtor<AtomicExpandLoadLinked>)); - Registry.registerPass(*PI, true); - return PI; -} - -void llvm::initializeAtomicExpandLoadLinkedPass(PassRegistry &Registry) { - CALL_ONCE_INITIALIZATION(initializeAtomicExpandLoadLinkedPassOnce) -} - +INITIALIZE_TM_PASS(AtomicExpandLoadLinked, "atomic-ll-sc", + "Expand Atomic calls in terms of load-linked & store-conditional", + false, false) FunctionPass *llvm::createAtomicExpandLoadLinkedPass(const TargetMachine *TM) { return new AtomicExpandLoadLinked(TM); } bool AtomicExpandLoadLinked::runOnFunction(Function &F) { - if (!TLI) + if (!TM || !TM->getSubtargetImpl()->enableAtomicExpandLoadLinked()) return false; SmallVector<Instruction *, 1> AtomicInsts; @@ -89,7 +78,7 @@ bool AtomicExpandLoadLinked::runOnFunction(Function &F) { bool MadeChange = false; for (Instruction *Inst : AtomicInsts) { - if (!TLI->shouldExpandAtomicInIR(Inst)) + if (!TM->getTargetLowering()->shouldExpandAtomicInIR(Inst)) continue; if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst)) @@ -111,13 +100,14 @@ bool AtomicExpandLoadLinked::expandAtomicLoad(LoadInst *LI) { // Load instructions don't actually need a leading fence, even in the // SequentiallyConsistent case. AtomicOrdering MemOpOrder = - TLI->getInsertFencesForAtomic() ? Monotonic : LI->getOrdering(); + TM->getTargetLowering()->getInsertFencesForAtomic() ? Monotonic + : LI->getOrdering(); // The only 64-bit load guaranteed to be single-copy atomic by the ARM ARM is // an ldrexd (A3.5.3). IRBuilder<> Builder(LI); - Value *Val = - TLI->emitLoadLinked(Builder, LI->getPointerOperand(), MemOpOrder); + Value *Val = TM->getTargetLowering()->emitLoadLinked( + Builder, LI->getPointerOperand(), MemOpOrder); insertTrailingFence(Builder, LI->getOrdering()); @@ -178,7 +168,8 @@ bool AtomicExpandLoadLinked::expandAtomicRMW(AtomicRMWInst *AI) { // Start the main loop block now that we've taken care of the preliminaries. Builder.SetInsertPoint(LoopBB); - Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); + Value *Loaded = + TM->getTargetLowering()->emitLoadLinked(Builder, Addr, MemOpOrder); Value *NewVal; switch (AI->getOperation()) { @@ -195,7 +186,7 @@ bool AtomicExpandLoadLinked::expandAtomicRMW(AtomicRMWInst *AI) { NewVal = Builder.CreateAnd(Loaded, AI->getValOperand(), "new"); break; case AtomicRMWInst::Nand: - NewVal = Builder.CreateAnd(Loaded, Builder.CreateNot(AI->getValOperand()), + NewVal = Builder.CreateNot(Builder.CreateAnd(Loaded, AI->getValOperand()), "new"); break; case AtomicRMWInst::Or: @@ -224,8 +215,8 @@ bool AtomicExpandLoadLinked::expandAtomicRMW(AtomicRMWInst *AI) { llvm_unreachable("Unknown atomic op"); } - Value *StoreSuccess = - TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder); + Value *StoreSuccess = TM->getTargetLowering()->emitStoreConditional( + Builder, NewVal, Addr, MemOpOrder); Value *TryAgain = Builder.CreateICmpNE( StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain"); Builder.CreateCondBr(TryAgain, LoopBB, ExitBB); @@ -256,19 +247,26 @@ bool AtomicExpandLoadLinked::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // %loaded = @load.linked(%addr) // %should_store = icmp eq %loaded, %desired // br i1 %should_store, label %cmpxchg.trystore, - // label %cmpxchg.end/%cmpxchg.barrier + // label %cmpxchg.failure // cmpxchg.trystore: // %stored = @store_conditional(%new, %addr) - // %try_again = icmp i32 ne %stored, 0 - // br i1 %try_again, label %loop, label %cmpxchg.end - // cmpxchg.barrier: + // %success = icmp eq i32 %stored, 0 + // br i1 %success, label %cmpxchg.success, label %loop/%cmpxchg.failure + // cmpxchg.success: + // fence? + // br label %cmpxchg.end + // cmpxchg.failure: // fence? // br label %cmpxchg.end // cmpxchg.end: + // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure] + // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0 + // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1 // [...] BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end"); - auto BarrierBB = BasicBlock::Create(Ctx, "cmpxchg.barrier", F, ExitBB); - auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, BarrierBB); + auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB); + auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, FailureBB); + auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB); auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB); // This grabs the DebugLoc from CI @@ -284,37 +282,82 @@ bool AtomicExpandLoadLinked::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // Start the main loop block now that we've taken care of the preliminaries. Builder.SetInsertPoint(LoopBB); - Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); + Value *Loaded = + TM->getTargetLowering()->emitLoadLinked(Builder, Addr, MemOpOrder); Value *ShouldStore = Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store"); // If the the cmpxchg doesn't actually need any ordering when it fails, we can // jump straight past that fence instruction (if it exists). - BasicBlock *FailureBB = FailureOrder == Monotonic ? ExitBB : BarrierBB; Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB); Builder.SetInsertPoint(TryStoreBB); - Value *StoreSuccess = TLI->emitStoreConditional( + Value *StoreSuccess = TM->getTargetLowering()->emitStoreConditional( Builder, CI->getNewValOperand(), Addr, MemOpOrder); - Value *TryAgain = Builder.CreateICmpNE( + StoreSuccess = Builder.CreateICmpEQ( StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); - Builder.CreateCondBr(TryAgain, LoopBB, BarrierBB); + Builder.CreateCondBr(StoreSuccess, SuccessBB, + CI->isWeak() ? FailureBB : LoopBB); - // Finally, make sure later instructions don't get reordered with a fence if - // necessary. - Builder.SetInsertPoint(BarrierBB); + // Make sure later instructions don't get reordered with a fence if necessary. + Builder.SetInsertPoint(SuccessBB); insertTrailingFence(Builder, SuccessOrder); Builder.CreateBr(ExitBB); - CI->replaceAllUsesWith(Loaded); - CI->eraseFromParent(); + Builder.SetInsertPoint(FailureBB); + insertTrailingFence(Builder, FailureOrder); + Builder.CreateBr(ExitBB); + + // Finally, we have control-flow based knowledge of whether the cmpxchg + // succeeded or not. We expose this to later passes by converting any + // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate PHI. + // Setup the builder so we can create any PHIs we need. + Builder.SetInsertPoint(ExitBB, ExitBB->begin()); + PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2); + Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB); + Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB); + + // Look for any users of the cmpxchg that are just comparing the loaded value + // against the desired one, and replace them with the CFG-derived version. + SmallVector<ExtractValueInst *, 2> PrunedInsts; + for (auto User : CI->users()) { + ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User); + if (!EV) + continue; + + assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 && + "weird extraction from { iN, i1 }"); + + if (EV->getIndices()[0] == 0) + EV->replaceAllUsesWith(Loaded); + else + EV->replaceAllUsesWith(Success); + + PrunedInsts.push_back(EV); + } + + // We can remove the instructions now we're no longer iterating through them. + for (auto EV : PrunedInsts) + EV->eraseFromParent(); + + if (!CI->use_empty()) { + // Some use of the full struct return that we don't understand has happened, + // so we've got to reconstruct it properly. + Value *Res; + Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0); + Res = Builder.CreateInsertValue(Res, Success, 1); + + CI->replaceAllUsesWith(Res); + } + + CI->eraseFromParent(); return true; } AtomicOrdering AtomicExpandLoadLinked::insertLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord) { - if (!TLI->getInsertFencesForAtomic()) + if (!TM->getTargetLowering()->getInsertFencesForAtomic()) return Ord; if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent) @@ -327,7 +370,7 @@ AtomicOrdering AtomicExpandLoadLinked::insertLeadingFence(IRBuilder<> &Builder, void AtomicExpandLoadLinked::insertTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord) { - if (!TLI->getInsertFencesForAtomic()) + if (!TM->getTargetLowering()->getInsertFencesForAtomic()) return; if (Ord == Acquire || Ord == AcquireRelease) diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp index 7f31b1a..b2737bf 100644 --- a/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -39,6 +39,9 @@ class BasicTTI final : public ImmutablePass, public TargetTransformInfo { /// are set if the result needs to be inserted and/or extracted from vectors. unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; + /// Estimate the cost overhead of SK_Alternate shuffle. + unsigned getAltShuffleOverhead(Type *Ty) const; + const TargetLoweringBase *getTLI() const { return TM->getTargetLowering(); } public: @@ -327,8 +330,28 @@ unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, return OpCost; } +unsigned BasicTTI::getAltShuffleOverhead(Type *Ty) const { + assert(Ty->isVectorTy() && "Can only shuffle vectors"); + unsigned Cost = 0; + // Shuffle cost is equal to the cost of extracting element from its argument + // plus the cost of inserting them onto the result vector. + + // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from index + // 0 of first vector, index 1 of second vector,index 2 of first vector and + // finally index 3 of second vector and insert them at index <0,1,2,3> of + // result vector. + for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { + Cost += TopTTI->getVectorInstrCost(Instruction::InsertElement, Ty, i); + Cost += TopTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, i); + } + return Cost; +} + unsigned BasicTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) const { + if (Kind == SK_Alternate) { + return getAltShuffleOverhead(Tp); + } return 1; } diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index f623a48..7503e57 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -1505,10 +1505,17 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, if (MO.isUse()) { for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) Uses.insert(*AI); - } else if (!MO.isDead()) - // Don't try to hoist code in the rare case the terminator defines a - // register that is later used. - return MBB->end(); + } else { + if (!MO.isDead()) + // Don't try to hoist code in the rare case the terminator defines a + // register that is later used. + return MBB->end(); + + // If the terminator defines a register, make sure we don't hoist + // the instruction whose def might be clobbered by the terminator. + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + Defs.insert(*AI); + } } if (Uses.empty()) diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 0b492a9..57c24e8 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -22,11 +22,13 @@ add_llvm_library(LLVMCodeGen GCMetadata.cpp GCMetadataPrinter.cpp GCStrategy.cpp + GlobalMerge.cpp IfConversion.cpp InlineSpiller.cpp InterferenceCache.cpp IntrinsicLowering.cpp JITCodeEmitter.cpp + JumpInstrTables.cpp LLVMTargetMachine.cpp LatencyPriorityQueue.cpp LexicalScopes.cpp diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index 6aa60c6..ccac40c 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -151,19 +151,8 @@ typedef DenseMap<Instruction *, Type *> InstrToOrigTy; } char CodeGenPrepare::ID = 0; -static void *initializeCodeGenPreparePassOnce(PassRegistry &Registry) { - initializeTargetLibraryInfoPass(Registry); - PassInfo *PI = new PassInfo( - "Optimize for code generation", "codegenprepare", &CodeGenPrepare::ID, - PassInfo::NormalCtor_t(callDefaultCtor<CodeGenPrepare>), false, false, - PassInfo::TargetMachineCtor_t(callTargetMachineCtor<CodeGenPrepare>)); - Registry.registerPass(*PI, true); - return PI; -} - -void llvm::initializeCodeGenPreparePass(PassRegistry &Registry) { - CALL_ONCE_INITIALIZATION(initializeCodeGenPreparePassOnce) -} +INITIALIZE_TM_PASS(CodeGenPrepare, "codegenprepare", + "Optimize for code generation", false, false) FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) { return new CodeGenPrepare(TM); @@ -1078,8 +1067,11 @@ void ExtAddrMode::print(raw_ostream &OS) const { NeedPlus = true; } - if (BaseOffs) - OS << (NeedPlus ? " + " : "") << BaseOffs, NeedPlus = true; + if (BaseOffs) { + OS << (NeedPlus ? " + " : "") + << BaseOffs; + NeedPlus = true; + } if (BaseReg) { OS << (NeedPlus ? " + " : "") diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index 822636f..d3ffcc7 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -146,8 +146,8 @@ static const SDep *CriticalPathStep(const SUnit *SU) { void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { // It's not safe to change register allocation for source operands of - // that have special allocation requirements. Also assume all registers - // used in a call must not be changed (ABI). + // instructions that have special allocation requirements. Also assume all + // registers used in a call must not be changed (ABI). // FIXME: The issue with predicated instruction is more complex. We are being // conservative here because the kill markers cannot be trusted after // if-conversion: @@ -200,6 +200,28 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1)) RegRefs.insert(std::make_pair(Reg, &MO)); + // If this reg is tied and live (Classes[Reg] is set to -1), we can't change + // it or any of its sub or super regs. We need to use KeepRegs to mark the + // reg because not all uses of the same reg within an instruction are + // necessarily tagged as tied. + // Example: an x86 "xor %eax, %eax" will have one source operand tied to the + // def register but not the second (see PR20020 for details). + // FIXME: can this check be relaxed to account for undef uses + // of a register? In the above 'xor' example, the uses of %eax are undef, so + // earlier instructions could still replace %eax even though the 'xor' + // itself can't be changed. + if (MI->isRegTiedToUseOperand(i) && + Classes[Reg] == reinterpret_cast<TargetRegisterClass *>(-1)) { + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) { + KeepRegs.set(*SubRegs); + } + for (MCSuperRegIterator SuperRegs(Reg, TRI); + SuperRegs.isValid(); ++SuperRegs) { + KeepRegs.set(*SuperRegs); + } + } + if (MO.isUse() && Special) { if (!KeepRegs.test(Reg)) { for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); @@ -236,9 +258,15 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, unsigned Reg = MO.getReg(); if (Reg == 0) continue; if (!MO.isDef()) continue; + + // If we've already marked this reg as unchangeable, carry on. + if (KeepRegs.test(Reg)) continue; + // Ignore two-addr defs. if (MI->isRegTiedToUseOperand(i)) continue; + // FIXME: we should use a SubRegIterator that includes self (as above), so + // we don't have to repeat all this code for the reg itself. DefIndices[Reg] = Count; KillIndices[Reg] = ~0u; assert(((KillIndices[Reg] == ~0u) != @@ -281,6 +309,9 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, RegRefs.insert(std::make_pair(Reg, &MO)); + // FIXME: we should use an MCRegAliasIterator that includes self so we don't + // have to repeat all this code for the reg itself. + // It wasn't previously live but now it is, this is a kill. if (KillIndices[Reg] == ~0u) { KillIndices[Reg] = Count; @@ -309,7 +340,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, // the two-address instruction also defines NewReg, as may happen with // pre/postincrement loads. In this case, both the use and def operands are in // RegRefs because the def is inserted by PrescanInstruction and not erased -// during ScanInstruction. So checking for an instructions with definitions of +// during ScanInstruction. So checking for an instruction with definitions of // both NewReg and AntiDepReg covers it. bool CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin, @@ -325,7 +356,7 @@ CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin, if (RefOper->isDef() && RefOper->isEarlyClobber()) return true; - // Handle cases in which this instructions defines NewReg. + // Handle cases in which this instruction defines NewReg. MachineInstr *MI = RefOper->getParent(); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &CheckOper = MI->getOperand(i); @@ -343,11 +374,11 @@ CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin, return true; // Don't allow an instruction using AntiDepReg to be earlyclobbered by - // NewReg + // NewReg. if (CheckOper.isEarlyClobber()) return true; - // Don't allow inline asm to define NewReg at all. Who know what it's + // Don't allow inline asm to define NewReg at all. Who knows what it's // doing with it. if (MI->isInlineAsm()) return true; @@ -494,8 +525,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, // as we go to help determine which registers are available. unsigned Broken = 0; unsigned Count = InsertPosIndex - 1; - for (MachineBasicBlock::iterator I = End, E = Begin; - I != E; --Count) { + for (MachineBasicBlock::iterator I = End, E = Begin; I != E; --Count) { MachineInstr *MI = --I; if (MI->isDebugValue()) continue; @@ -526,7 +556,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, // Don't break anti-dependencies on non-allocatable registers. AntiDepReg = 0; else if (KeepRegs.test(AntiDepReg)) - // Don't break anti-dependencies if an use down below requires + // Don't break anti-dependencies if a use down below requires // this exact register. AntiDepReg = 0; else { @@ -564,8 +594,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, // If MI's defs have a special allocation requirement, don't allow // any def registers to be changed. Also assume all registers // defined in a call must not be changed (ABI). - if (MI->isCall() || MI->hasExtraDefRegAllocReq() || - TII->isPredicated(MI)) + if (MI->isCall() || MI->hasExtraDefRegAllocReq() || TII->isPredicated(MI)) // If this instruction's defs have special allocation requirement, don't // break this anti-dependency. AntiDepReg = 0; diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h index 1949a48..45e4ff5 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.h +++ b/lib/CodeGen/CriticalAntiDepBreaker.h @@ -55,12 +55,12 @@ class TargetRegisterInfo; typedef std::multimap<unsigned, MachineOperand *>::const_iterator RegRefIter; - /// KillIndices - The index of the most recent kill (proceding bottom-up), + /// KillIndices - The index of the most recent kill (proceeding bottom-up), /// or ~0u if the register is not live. std::vector<unsigned> KillIndices; - /// DefIndices - The index of the most recent complete def (proceding bottom - /// up), or ~0u if the register is live. + /// DefIndices - The index of the most recent complete def (proceeding + /// bottom up), or ~0u if the register is live. std::vector<unsigned> DefIndices; /// KeepRegs - A set of registers which are live and cannot be changed to diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp new file mode 100644 index 0000000..027ee38 --- /dev/null +++ b/lib/CodeGen/GlobalMerge.cpp @@ -0,0 +1,361 @@ +//===-- GlobalMerge.cpp - Internal globals merging -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This pass merges globals with internal linkage into one. This way all the +// globals which were merged into a biggest one can be addressed using offsets +// from the same base pointer (no need for separate base pointer for each of the +// global). Such a transformation can significantly reduce the register pressure +// when many globals are involved. +// +// For example, consider the code which touches several global variables at +// once: +// +// static int foo[N], bar[N], baz[N]; +// +// for (i = 0; i < N; ++i) { +// foo[i] = bar[i] * baz[i]; +// } +// +// On ARM the addresses of 3 arrays should be kept in the registers, thus +// this code has quite large register pressure (loop body): +// +// ldr r1, [r5], #4 +// ldr r2, [r6], #4 +// mul r1, r2, r1 +// str r1, [r0], #4 +// +// Pass converts the code to something like: +// +// static struct { +// int foo[N]; +// int bar[N]; +// int baz[N]; +// } merged; +// +// for (i = 0; i < N; ++i) { +// merged.foo[i] = merged.bar[i] * merged.baz[i]; +// } +// +// and in ARM code this becomes: +// +// ldr r0, [r5, #40] +// ldr r1, [r5, #80] +// mul r0, r1, r0 +// str r0, [r5], #4 +// +// note that we saved 2 registers here almostly "for free". +// ===---------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +using namespace llvm; + +#define DEBUG_TYPE "global-merge" + +cl::opt<bool> +EnableGlobalMerge("enable-global-merge", cl::Hidden, + cl::desc("Enable global merge pass"), + cl::init(true)); + +static cl::opt<bool> +EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden, + cl::desc("Enable global merge pass on constants"), + cl::init(false)); + +// FIXME: this could be a transitional option, and we probably need to remove +// it if only we are sure this optimization could always benefit all targets. +static cl::opt<bool> +EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden, + cl::desc("Enable global merge pass on external linkage"), + cl::init(false)); + +STATISTIC(NumMerged , "Number of globals merged"); +namespace { + class GlobalMerge : public FunctionPass { + const TargetMachine *TM; + + bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals, + Module &M, bool isConst, unsigned AddrSpace) const; + + /// \brief Check if the given variable has been identified as must keep + /// \pre setMustKeepGlobalVariables must have been called on the Module that + /// contains GV + bool isMustKeepGlobalVariable(const GlobalVariable *GV) const { + return MustKeepGlobalVariables.count(GV); + } + + /// Collect every variables marked as "used" or used in a landing pad + /// instruction for this Module. + void setMustKeepGlobalVariables(Module &M); + + /// Collect every variables marked as "used" + void collectUsedGlobalVariables(Module &M); + + /// Keep track of the GlobalVariable that must not be merged away + SmallPtrSet<const GlobalVariable *, 16> MustKeepGlobalVariables; + + public: + static char ID; // Pass identification, replacement for typeid. + explicit GlobalMerge(const TargetMachine *TM = nullptr) + : FunctionPass(ID), TM(TM) { + initializeGlobalMergePass(*PassRegistry::getPassRegistry()); + } + + bool doInitialization(Module &M) override; + bool runOnFunction(Function &F) override; + bool doFinalization(Module &M) override; + + const char *getPassName() const override { + return "Merge internal globals"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + FunctionPass::getAnalysisUsage(AU); + } + }; +} // end anonymous namespace + +char GlobalMerge::ID = 0; +INITIALIZE_TM_PASS(GlobalMerge, "global-merge", "Merge global variables", + false, false) + +bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, + Module &M, bool isConst, unsigned AddrSpace) const { + const TargetLowering *TLI = TM->getTargetLowering(); + const DataLayout *DL = TLI->getDataLayout(); + + // FIXME: Infer the maximum possible offset depending on the actual users + // (these max offsets are different for the users inside Thumb or ARM + // functions) + unsigned MaxOffset = TLI->getMaximalGlobalOffset(); + + // FIXME: Find better heuristics + std::stable_sort(Globals.begin(), Globals.end(), + [DL](const GlobalVariable *GV1, const GlobalVariable *GV2) { + Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType(); + Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType(); + + return (DL->getTypeAllocSize(Ty1) < DL->getTypeAllocSize(Ty2)); + }); + + Type *Int32Ty = Type::getInt32Ty(M.getContext()); + + assert(Globals.size() > 1); + + // FIXME: This simple solution merges globals all together as maximum as + // possible. However, with this solution it would be hard to remove dead + // global symbols at link-time. An alternative solution could be checking + // global symbols references function by function, and make the symbols + // being referred in the same function merged and we would probably need + // to introduce heuristic algorithm to solve the merge conflict from + // different functions. + for (size_t i = 0, e = Globals.size(); i != e; ) { + size_t j = 0; + uint64_t MergedSize = 0; + std::vector<Type*> Tys; + std::vector<Constant*> Inits; + + bool HasExternal = false; + GlobalVariable *TheFirstExternal = 0; + for (j = i; j != e; ++j) { + Type *Ty = Globals[j]->getType()->getElementType(); + MergedSize += DL->getTypeAllocSize(Ty); + if (MergedSize > MaxOffset) { + break; + } + Tys.push_back(Ty); + Inits.push_back(Globals[j]->getInitializer()); + + if (Globals[j]->hasExternalLinkage() && !HasExternal) { + HasExternal = true; + TheFirstExternal = Globals[j]; + } + } + + // If merged variables doesn't have external linkage, we needn't to expose + // the symbol after merging. + GlobalValue::LinkageTypes Linkage = HasExternal + ? GlobalValue::ExternalLinkage + : GlobalValue::InternalLinkage; + + StructType *MergedTy = StructType::get(M.getContext(), Tys); + Constant *MergedInit = ConstantStruct::get(MergedTy, Inits); + + // If merged variables have external linkage, we use symbol name of the + // first variable merged as the suffix of global symbol name. This would + // be able to avoid the link-time naming conflict for globalm symbols. + GlobalVariable *MergedGV = new GlobalVariable( + M, MergedTy, isConst, Linkage, MergedInit, + HasExternal ? "_MergedGlobals_" + TheFirstExternal->getName() + : "_MergedGlobals", + nullptr, GlobalVariable::NotThreadLocal, AddrSpace); + + for (size_t k = i; k < j; ++k) { + GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage(); + std::string Name = Globals[k]->getName(); + + Constant *Idx[2] = { + ConstantInt::get(Int32Ty, 0), + ConstantInt::get(Int32Ty, k-i) + }; + Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx); + Globals[k]->replaceAllUsesWith(GEP); + Globals[k]->eraseFromParent(); + + if (Linkage != GlobalValue::InternalLinkage) { + // Generate a new alias... + auto *PTy = cast<PointerType>(GEP->getType()); + GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(), + Linkage, Name, GEP, &M); + } + + NumMerged++; + } + i = j; + } + + return true; +} + +void GlobalMerge::collectUsedGlobalVariables(Module &M) { + // Extract global variables from llvm.used array + const GlobalVariable *GV = M.getGlobalVariable("llvm.used"); + if (!GV || !GV->hasInitializer()) return; + + // Should be an array of 'i8*'. + const ConstantArray *InitList = cast<ConstantArray>(GV->getInitializer()); + + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) + if (const GlobalVariable *G = + dyn_cast<GlobalVariable>(InitList->getOperand(i)->stripPointerCasts())) + MustKeepGlobalVariables.insert(G); +} + +void GlobalMerge::setMustKeepGlobalVariables(Module &M) { + collectUsedGlobalVariables(M); + + for (Module::iterator IFn = M.begin(), IEndFn = M.end(); IFn != IEndFn; + ++IFn) { + for (Function::iterator IBB = IFn->begin(), IEndBB = IFn->end(); + IBB != IEndBB; ++IBB) { + // Follow the invoke link to find the landing pad instruction + const InvokeInst *II = dyn_cast<InvokeInst>(IBB->getTerminator()); + if (!II) continue; + + const LandingPadInst *LPInst = II->getUnwindDest()->getLandingPadInst(); + // Look for globals in the clauses of the landing pad instruction + for (unsigned Idx = 0, NumClauses = LPInst->getNumClauses(); + Idx != NumClauses; ++Idx) + if (const GlobalVariable *GV = + dyn_cast<GlobalVariable>(LPInst->getClause(Idx) + ->stripPointerCasts())) + MustKeepGlobalVariables.insert(GV); + } + } +} + +bool GlobalMerge::doInitialization(Module &M) { + if (!EnableGlobalMerge) + return false; + + DenseMap<unsigned, SmallVector<GlobalVariable*, 16> > Globals, ConstGlobals, + BSSGlobals; + const TargetLowering *TLI = TM->getTargetLowering(); + const DataLayout *DL = TLI->getDataLayout(); + unsigned MaxOffset = TLI->getMaximalGlobalOffset(); + bool Changed = false; + setMustKeepGlobalVariables(M); + + // Grab all non-const globals. + for (Module::global_iterator I = M.global_begin(), + E = M.global_end(); I != E; ++I) { + // Merge is safe for "normal" internal or external globals only + if (I->isDeclaration() || I->isThreadLocal() || I->hasSection()) + continue; + + if (!(EnableGlobalMergeOnExternal && I->hasExternalLinkage()) && + !I->hasInternalLinkage()) + continue; + + PointerType *PT = dyn_cast<PointerType>(I->getType()); + assert(PT && "Global variable is not a pointer!"); + + unsigned AddressSpace = PT->getAddressSpace(); + + // Ignore fancy-aligned globals for now. + unsigned Alignment = DL->getPreferredAlignment(I); + Type *Ty = I->getType()->getElementType(); + if (Alignment > DL->getABITypeAlignment(Ty)) + continue; + + // Ignore all 'special' globals. + if (I->getName().startswith("llvm.") || + I->getName().startswith(".llvm.")) + continue; + + // Ignore all "required" globals: + if (isMustKeepGlobalVariable(I)) + continue; + + if (DL->getTypeAllocSize(Ty) < MaxOffset) { + if (TargetLoweringObjectFile::getKindForGlobal(I, *TM).isBSSLocal()) + BSSGlobals[AddressSpace].push_back(I); + else if (I->isConstant()) + ConstGlobals[AddressSpace].push_back(I); + else + Globals[AddressSpace].push_back(I); + } + } + + for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator + I = Globals.begin(), E = Globals.end(); I != E; ++I) + if (I->second.size() > 1) + Changed |= doMerge(I->second, M, false, I->first); + + for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator + I = BSSGlobals.begin(), E = BSSGlobals.end(); I != E; ++I) + if (I->second.size() > 1) + Changed |= doMerge(I->second, M, false, I->first); + + if (EnableGlobalMergeOnConst) + for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator + I = ConstGlobals.begin(), E = ConstGlobals.end(); I != E; ++I) + if (I->second.size() > 1) + Changed |= doMerge(I->second, M, true, I->first); + + return Changed; +} + +bool GlobalMerge::runOnFunction(Function &F) { + return false; +} + +bool GlobalMerge::doFinalization(Module &M) { + MustKeepGlobalVariables.clear(); + return false; +} + +Pass *llvm::createGlobalMergePass(const TargetMachine *TM) { + return new GlobalMerge(TM); +} diff --git a/lib/CodeGen/JumpInstrTables.cpp b/lib/CodeGen/JumpInstrTables.cpp new file mode 100644 index 0000000..61ef722 --- /dev/null +++ b/lib/CodeGen/JumpInstrTables.cpp @@ -0,0 +1,301 @@ +//===-- JumpInstrTables.cpp: Jump-Instruction Tables ----------------------===// +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief An implementation of jump-instruction tables. +/// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "jt" + +#include "llvm/CodeGen/JumpInstrTables.h" + +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/JumpInstrTableInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#include <vector> + +using namespace llvm; + +char JumpInstrTables::ID = 0; + +INITIALIZE_PASS_BEGIN(JumpInstrTables, "jump-instr-tables", + "Jump-Instruction Tables", true, true) +INITIALIZE_PASS_DEPENDENCY(JumpInstrTableInfo); +INITIALIZE_PASS_END(JumpInstrTables, "jump-instr-tables", + "Jump-Instruction Tables", true, true) + +STATISTIC(NumJumpTables, "Number of indirect call tables generated"); +STATISTIC(NumFuncsInJumpTables, "Number of functions in the jump tables"); + +ModulePass *llvm::createJumpInstrTablesPass() { + // The default implementation uses a single table for all functions. + return new JumpInstrTables(JumpTable::Single); +} + +ModulePass *llvm::createJumpInstrTablesPass(JumpTable::JumpTableType JTT) { + return new JumpInstrTables(JTT); +} + +namespace { +static const char jump_func_prefix[] = "__llvm_jump_instr_table_"; +static const char jump_section_prefix[] = ".jump.instr.table.text."; + +// Checks to see if a given CallSite is making an indirect call, including +// cases where the indirect call is made through a bitcast. +bool isIndirectCall(CallSite &CS) { + if (CS.getCalledFunction()) + return false; + + // Check the value to see if it is merely a bitcast of a function. In + // this case, it will translate to a direct function call in the resulting + // assembly, so we won't treat it as an indirect call here. + const Value *V = CS.getCalledValue(); + if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { + return !(CE->isCast() && isa<Function>(CE->getOperand(0))); + } + + // Otherwise, since we know it's a call, it must be an indirect call + return true; +} + +// Replaces Functions and GlobalAliases with a different Value. +bool replaceGlobalValueIndirectUse(GlobalValue *GV, Value *V, Use *U) { + User *Us = U->getUser(); + if (!Us) + return false; + if (Instruction *I = dyn_cast<Instruction>(Us)) { + CallSite CS(I); + + // Don't do the replacement if this use is a direct call to this function. + // If the use is not the called value, then replace it. + if (CS && (isIndirectCall(CS) || CS.isCallee(U))) { + return false; + } + + U->set(V); + } else if (Constant *C = dyn_cast<Constant>(Us)) { + // Don't replace calls to bitcasts of function symbols, since they get + // translated to direct calls. + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Us)) { + if (CE->getOpcode() == Instruction::BitCast) { + // This bitcast must have exactly one user. + if (CE->user_begin() != CE->user_end()) { + User *ParentUs = *CE->user_begin(); + if (CallInst *CI = dyn_cast<CallInst>(ParentUs)) { + CallSite CS(CI); + Use &CEU = *CE->use_begin(); + if (CS.isCallee(&CEU)) { + return false; + } + } + } + } + } + + // GlobalAlias doesn't support replaceUsesOfWithOnConstant. And the verifier + // requires alias to point to a defined function. So, GlobalAlias is handled + // as a separate case in runOnModule. + if (!isa<GlobalAlias>(C)) + C->replaceUsesOfWithOnConstant(GV, V, U); + } else { + assert(false && "The Use of a Function symbol is neither an instruction nor" + " a constant"); + } + + return true; +} + +// Replaces all replaceable address-taken uses of GV with a pointer to a +// jump-instruction table entry. +void replaceValueWithFunction(GlobalValue *GV, Function *F) { + // Go through all uses of this function and replace the uses of GV with the + // jump-table version of the function. Get the uses as a vector before + // replacing them, since replacing them changes the use list and invalidates + // the iterator otherwise. + for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E;) { + Use &U = *I++; + + // Replacement of constants replaces all instances in the constant. So, some + // uses might have already been handled by the time we reach them here. + if (U.get() == GV) + replaceGlobalValueIndirectUse(GV, F, &U); + } + + return; +} +} // end anonymous namespace + +JumpInstrTables::JumpInstrTables() + : ModulePass(ID), Metadata(), JITI(nullptr), TableCount(0), + JTType(JumpTable::Single) { + initializeJumpInstrTablesPass(*PassRegistry::getPassRegistry()); +} + +JumpInstrTables::JumpInstrTables(JumpTable::JumpTableType JTT) + : ModulePass(ID), Metadata(), JITI(nullptr), TableCount(0), JTType(JTT) { + initializeJumpInstrTablesPass(*PassRegistry::getPassRegistry()); +} + +JumpInstrTables::~JumpInstrTables() {} + +void JumpInstrTables::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<JumpInstrTableInfo>(); +} + +Function *JumpInstrTables::insertEntry(Module &M, Function *Target) { + FunctionType *OrigFunTy = Target->getFunctionType(); + FunctionType *FunTy = transformType(OrigFunTy); + + JumpMap::iterator it = Metadata.find(FunTy); + if (Metadata.end() == it) { + struct TableMeta Meta; + Meta.TableNum = TableCount; + Meta.Count = 0; + Metadata[FunTy] = Meta; + it = Metadata.find(FunTy); + ++NumJumpTables; + ++TableCount; + } + + it->second.Count++; + + std::string NewName(jump_func_prefix); + NewName += (Twine(it->second.TableNum) + "_" + Twine(it->second.Count)).str(); + Function *JumpFun = + Function::Create(OrigFunTy, GlobalValue::ExternalLinkage, NewName, &M); + // The section for this table + JumpFun->setSection((jump_section_prefix + Twine(it->second.TableNum)).str()); + JITI->insertEntry(FunTy, Target, JumpFun); + + ++NumFuncsInJumpTables; + return JumpFun; +} + +bool JumpInstrTables::hasTable(FunctionType *FunTy) { + FunctionType *TransTy = transformType(FunTy); + return Metadata.end() != Metadata.find(TransTy); +} + +FunctionType *JumpInstrTables::transformType(FunctionType *FunTy) { + // Returning nullptr forces all types into the same table, since all types map + // to the same type + Type *VoidPtrTy = Type::getInt8PtrTy(FunTy->getContext()); + + // Ignore the return type. + Type *RetTy = VoidPtrTy; + bool IsVarArg = FunTy->isVarArg(); + std::vector<Type *> ParamTys(FunTy->getNumParams()); + FunctionType::param_iterator PI, PE; + int i = 0; + + std::vector<Type *> EmptyParams; + Type *Int32Ty = Type::getInt32Ty(FunTy->getContext()); + FunctionType *VoidFnTy = FunctionType::get( + Type::getVoidTy(FunTy->getContext()), EmptyParams, false); + switch (JTType) { + case JumpTable::Single: + + return FunctionType::get(RetTy, EmptyParams, false); + case JumpTable::Arity: + // Transform all types to void* so that all functions with the same arity + // end up in the same table. + for (PI = FunTy->param_begin(), PE = FunTy->param_end(); PI != PE; + PI++, i++) { + ParamTys[i] = VoidPtrTy; + } + + return FunctionType::get(RetTy, ParamTys, IsVarArg); + case JumpTable::Simplified: + // Project all parameters types to one of 3 types: composite, integer, and + // function, matching the three subclasses of Type. + for (PI = FunTy->param_begin(), PE = FunTy->param_end(); PI != PE; + ++PI, ++i) { + assert((isa<IntegerType>(*PI) || isa<FunctionType>(*PI) || + isa<CompositeType>(*PI)) && + "This type is not an Integer or a Composite or a Function"); + if (isa<CompositeType>(*PI)) { + ParamTys[i] = VoidPtrTy; + } else if (isa<FunctionType>(*PI)) { + ParamTys[i] = VoidFnTy; + } else if (isa<IntegerType>(*PI)) { + ParamTys[i] = Int32Ty; + } + } + + return FunctionType::get(RetTy, ParamTys, IsVarArg); + case JumpTable::Full: + // Don't transform this type at all. + return FunTy; + } + + return nullptr; +} + +bool JumpInstrTables::runOnModule(Module &M) { + // Make sure the module is well-formed, especially with respect to jumptable. + if (verifyModule(M)) + return false; + + JITI = &getAnalysis<JumpInstrTableInfo>(); + + // Get the set of jumptable-annotated functions. + DenseMap<Function *, Function *> Functions; + for (Function &F : M) { + if (F.hasFnAttribute(Attribute::JumpTable)) { + assert(F.hasUnnamedAddr() && + "Attribute 'jumptable' requires 'unnamed_addr'"); + Functions[&F] = nullptr; + } + } + + // Create the jump-table functions. + for (auto &KV : Functions) { + Function *F = KV.first; + KV.second = insertEntry(M, F); + } + + // GlobalAlias is a special case, because the target of an alias statement + // must be a defined function. So, instead of replacing a given function in + // the alias, we replace all uses of aliases that target jumptable functions. + // Note that there's no need to create these functions, since only aliases + // that target known jumptable functions are replaced, and there's no way to + // put the jumptable annotation on a global alias. + DenseMap<GlobalAlias *, Function *> Aliases; + for (GlobalAlias &GA : M.aliases()) { + Constant *Aliasee = GA.getAliasee(); + if (Function *F = dyn_cast<Function>(Aliasee)) { + auto it = Functions.find(F); + if (it != Functions.end()) { + Aliases[&GA] = it->second; + } + } + } + + // Replace each address taken function with its jump-instruction table entry. + for (auto &KV : Functions) + replaceValueWithFunction(KV.first, KV.second); + + for (auto &KV : Aliases) + replaceValueWithFunction(KV.first, KV.second); + + return !Functions.empty(); +} diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index a5ac057..df96b94 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -12,11 +12,15 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/TargetMachine.h" + +#include "llvm/Analysis/Passes.h" #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/JumpInstrTables.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/IRPrintingPasses.h" +#include "llvm/IR/Verifier.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstrInfo.h" @@ -82,6 +86,7 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM, bool DisableVerify, AnalysisID StartAfter, AnalysisID StopAfter) { + // Add internal analysis passes from the target machine. TM->addAnalysisPasses(PM); @@ -136,6 +141,11 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, bool DisableVerify, AnalysisID StartAfter, AnalysisID StopAfter) { + // Passes to handle jumptable function annotations. These can't be handled at + // JIT time, so we don't add them directly to addPassesToGenerateCode. + PM.add(createJumpInstrTableInfoPass()); + PM.add(createJumpInstrTablesPass(Options.JTType)); + // Add common CodeGen passes. MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify, StartAfter, StopAfter); @@ -199,7 +209,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, case CGFT_Null: // The Null output is intended for use for performance analysis and testing, // not real users. - AsmStreamer.reset(createNullStreamer(*Context)); + AsmStreamer.reset(getTarget().createNullStreamer(*Context)); break; } diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index 388f58f..7d5646b 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -329,12 +329,13 @@ class LDVImpl { void computeIntervals(); public: - LDVImpl(LiveDebugVariables *ps) : pass(*ps), EmitDone(false), - ModifiedMF(false) {} + LDVImpl(LiveDebugVariables *ps) + : pass(*ps), MF(nullptr), EmitDone(false), ModifiedMF(false) {} bool runOnMachineFunction(MachineFunction &mf); /// clear - Release all memory. void clear() { + MF = nullptr; userValues.clear(); virtRegToEqClass.clear(); userVarMap.clear(); @@ -693,11 +694,11 @@ void LDVImpl::computeIntervals() { } bool LDVImpl::runOnMachineFunction(MachineFunction &mf) { + clear(); MF = &mf; LIS = &pass.getAnalysis<LiveIntervals>(); MDT = &pass.getAnalysis<MachineDominatorTree>(); TRI = mf.getTarget().getRegisterInfo(); - clear(); LS.initialize(mf); DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: " << mf.getName() << " **********\n"); @@ -712,6 +713,8 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf) { bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) { if (!EnableLDV) return false; + if (!FunctionDIs.count(mf.getFunction())) + return false; if (!pImpl) pImpl = new LDVImpl(this); return static_cast<LDVImpl*>(pImpl)->runOnMachineFunction(mf); @@ -974,6 +977,8 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, void LDVImpl::emitDebugValues(VirtRegMap *VRM) { DEBUG(dbgs() << "********** EMITTING LIVE DEBUG VARIABLES **********\n"); + if (!MF) + return; const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); for (unsigned i = 0, e = userValues.size(); i != e; ++i) { DEBUG(userValues[i]->print(dbgs(), &MF->getTarget())); @@ -988,6 +993,10 @@ void LiveDebugVariables::emitDebugValues(VirtRegMap *VRM) { static_cast<LDVImpl*>(pImpl)->emitDebugValues(VRM); } +bool LiveDebugVariables::doInitialization(Module &M) { + FunctionDIs = makeSubprogramMap(M); + return Pass::doInitialization(M); +} #ifndef NDEBUG void LiveDebugVariables::dump() { diff --git a/lib/CodeGen/LiveDebugVariables.h b/lib/CodeGen/LiveDebugVariables.h index bb67435..7ec0d17 100644 --- a/lib/CodeGen/LiveDebugVariables.h +++ b/lib/CodeGen/LiveDebugVariables.h @@ -22,6 +22,7 @@ #define LLVM_CODEGEN_LIVEDEBUGVARIABLES_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" namespace llvm { @@ -32,6 +33,7 @@ class VirtRegMap; class LiveDebugVariables : public MachineFunctionPass { void *pImpl; + DenseMap<const Function*, DISubprogram> FunctionDIs; public: static char ID; // Pass identification, replacement for typeid @@ -64,6 +66,7 @@ private: bool runOnMachineFunction(MachineFunction &) override; void releaseMemory() override; void getAnalysisUsage(AnalysisUsage &) const override; + bool doInitialization(Module &) override; }; diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 3563f8e..1559560 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -186,6 +186,7 @@ void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) { LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); LRCalc->createDeadDefs(LI); LRCalc->extendToUses(LI); + computeDeadValues(&LI, LI, nullptr, nullptr); } void LiveIntervals::computeVirtRegs() { @@ -412,21 +413,34 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, // Handle dead values. bool CanSeparate = false; + computeDeadValues(li, NewLR, &CanSeparate, dead); + + // Move the trimmed segments back. + li->segments.swap(NewLR.segments); + DEBUG(dbgs() << "Shrunk: " << *li << '\n'); + return CanSeparate; +} + +void LiveIntervals::computeDeadValues(LiveInterval *li, + LiveRange &LR, + bool *CanSeparate, + SmallVectorImpl<MachineInstr*> *dead) { for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end(); I != E; ++I) { VNInfo *VNI = *I; if (VNI->isUnused()) continue; - LiveRange::iterator LRI = NewLR.FindSegmentContaining(VNI->def); - assert(LRI != NewLR.end() && "Missing segment for PHI"); + LiveRange::iterator LRI = LR.FindSegmentContaining(VNI->def); + assert(LRI != LR.end() && "Missing segment for PHI"); if (LRI->end != VNI->def.getDeadSlot()) continue; if (VNI->isPHIDef()) { // This is a dead PHI. Remove it. VNI->markUnused(); - NewLR.removeSegment(LRI->start, LRI->end); + LR.removeSegment(LRI->start, LRI->end); DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n"); - CanSeparate = true; + if (CanSeparate) + *CanSeparate = true; } else { // This is a dead def. Make sure the instruction knows. MachineInstr *MI = getInstructionFromIndex(VNI->def); @@ -438,11 +452,6 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, } } } - - // Move the trimmed segments back. - li->segments.swap(NewLR.segments); - DEBUG(dbgs() << "Shrunk: " << *li << '\n'); - return CanSeparate; } void LiveIntervals::extendToIndices(LiveRange &LR, diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 0ec5c33..08fef5f 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -332,7 +332,7 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const { } } -void MachineBasicBlock::printAsOperand(raw_ostream &OS, bool /*PrintType*/) { +void MachineBasicBlock::printAsOperand(raw_ostream &OS, bool /*PrintType*/) const { OS << "BB#" << getNumber(); } diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index eb3d71f..6138aef 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -457,7 +457,7 @@ unsigned MachineFunction::addLiveIn(unsigned PReg, /// getJTISymbol - Return the MCSymbol for the specified non-empty jump table. /// If isLinkerPrivate is specified, an 'l' label is returned, otherwise a /// normal 'L' label is returned. -MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, +MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate) const { const DataLayout *DL = getTarget().getDataLayout(); assert(JumpTableInfo && "No jump tables"); @@ -530,10 +530,9 @@ int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment, /// int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, unsigned Alignment) { - Alignment = - clampStackAlignment(!getFrameLowering()->isStackRealignable() || - !RealignOption, - Alignment, getFrameLowering()->getStackAlignment()); + Alignment = clampStackAlignment( + !getFrameLowering()->isStackRealignable() || !RealignOption, Alignment, + getFrameLowering()->getStackAlignment()); CreateStackObject(Size, Alignment, true); int Index = (int)Objects.size() - NumFixedObjects - 1; ensureMaxAlignment(Alignment); @@ -548,10 +547,9 @@ int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment, const AllocaInst *Alloca) { HasVarSizedObjects = true; - Alignment = - clampStackAlignment(!getFrameLowering()->isStackRealignable() || - !RealignOption, - Alignment, getFrameLowering()->getStackAlignment()); + Alignment = clampStackAlignment( + !getFrameLowering()->isStackRealignable() || !RealignOption, Alignment, + getFrameLowering()->getStackAlignment()); Objects.push_back(StackObject(0, Alignment, 0, false, false, Alloca)); ensureMaxAlignment(Alignment); return (int)Objects.size()-NumFixedObjects-1; @@ -571,16 +569,30 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, // object is 16-byte aligned. unsigned StackAlign = getFrameLowering()->getStackAlignment(); unsigned Align = MinAlign(SPOffset, StackAlign); - Align = - clampStackAlignment(!getFrameLowering()->isStackRealignable() || - !RealignOption, - Align, getFrameLowering()->getStackAlignment()); + Align = clampStackAlignment(!getFrameLowering()->isStackRealignable() || + !RealignOption, + Align, getFrameLowering()->getStackAlignment()); Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, /*isSS*/ false, /*Alloca*/ nullptr)); return -++NumFixedObjects; } +/// CreateFixedSpillStackObject - Create a spill slot at a fixed location +/// on the stack. Returns an index with a negative value. +int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size, + int64_t SPOffset) { + unsigned StackAlign = getFrameLowering()->getStackAlignment(); + unsigned Align = MinAlign(SPOffset, StackAlign); + Align = clampStackAlignment(!getFrameLowering()->isStackRealignable() || + !RealignOption, + Align, getFrameLowering()->getStackAlignment()); + Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, + /*Immutable*/ true, + /*isSS*/ true, + /*Alloca*/ nullptr)); + return -++NumFixedObjects; +} BitVector MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const { @@ -849,11 +861,10 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B, if (isa<StructType>(A->getType()) || isa<ArrayType>(A->getType()) || isa<StructType>(B->getType()) || isa<ArrayType>(B->getType())) return false; - + // For now, only support constants with the same size. uint64_t StoreSize = TD->getTypeStoreSize(A->getType()); - if (StoreSize != TD->getTypeStoreSize(B->getType()) || - StoreSize > 128) + if (StoreSize != TD->getTypeStoreSize(B->getType()) || StoreSize > 128) return false; Type *IntTy = IntegerType::get(A->getContext(), StoreSize*8); @@ -882,7 +893,7 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B, /// an existing one. User must specify the log2 of the minimum required /// alignment for the object. /// -unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C, +unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C, unsigned Alignment) { assert(Alignment && "Alignment must be specified!"); if (Alignment > PoolAlignment) PoolAlignment = Alignment; diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index 23847d6..44191f7 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -333,6 +333,12 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) { if (skipOptnoneFunction(*mf.getFunction())) return false; + const TargetSubtargetInfo &ST = + mf.getTarget().getSubtarget<TargetSubtargetInfo>(); + if (!ST.enablePostMachineScheduler()) { + DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n"); + return false; + } DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs())); // Initialize the context of the pass. @@ -472,14 +478,13 @@ void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const { // unimplemented } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void ReadyQueue::dump() { dbgs() << Name << ": "; for (unsigned i = 0, e = Queue.size(); i < e; ++i) dbgs() << Queue[i]->NodeNum << " "; dbgs() << "\n"; } -#endif //===----------------------------------------------------------------------===// // ScheduleDAGMI - Basic machine instruction scheduling. This is @@ -529,6 +534,11 @@ void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) { llvm_unreachable(nullptr); } #endif + // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However, + // CurrCycle may have advanced since then. + if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency()) + SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency(); + --SuccSU->NumPredsLeft; if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) SchedImpl->releaseTopNode(SuccSU); @@ -563,6 +573,11 @@ void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) { llvm_unreachable(nullptr); } #endif + // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However, + // CurrCycle may have advanced since then. + if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency()) + PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency(); + --PredSU->NumSuccsLeft; if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) SchedImpl->releaseBottomNode(PredSU); @@ -674,10 +689,13 @@ void ScheduleDAGMI::schedule() { CurrentBottom = MI; } } - updateQueues(SU, IsTopNode); - - // Notify the scheduling strategy after updating the DAG. + // Notify the scheduling strategy before updating the DAG. + // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues + // runs, it can then use the accurate ReadyCycle time to determine whether + // newly released nodes can move to the readyQ. SchedImpl->schedNode(SU, IsTopNode); + + updateQueues(SU, IsTopNode); } assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); @@ -1568,7 +1586,7 @@ void SchedBoundary::reset() { // Track the maximum number of stall cycles that could arise either from the // latency of a DAG edge or the number of cycles that a processor resource is // reserved (SchedBoundary::ReservedCycles). - MaxObservedLatency = 0; + MaxObservedStall = 0; #endif // Reserve a zero-count for invalid CritResIdx. ExecutedResCounts.resize(1); @@ -1668,8 +1686,16 @@ bool SchedBoundary::checkHazard(SUnit *SU) { for (TargetSchedModel::ProcResIter PI = SchedModel->getWriteProcResBegin(SC), PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { - if (getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles) > CurrCycle) + unsigned NRCycle = getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles); + if (NRCycle > CurrCycle) { +#ifndef NDEBUG + MaxObservedStall = std::max(PI->Cycles, MaxObservedStall); +#endif + DEBUG(dbgs() << " SU(" << SU->NodeNum << ") " + << SchedModel->getResourceName(PI->ProcResourceIdx) + << "=" << NRCycle << "c\n"); return true; + } } } return false; @@ -1725,6 +1751,16 @@ getOtherResourceCount(unsigned &OtherCritIdx) { } void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) { + assert(SU->getInstr() && "Scheduled SUnit must have instr"); + +#ifndef NDEBUG + // ReadyCycle was been bumped up to the CurrCycle when this node was + // scheduled, but CurrCycle may have been eagerly advanced immediately after + // scheduling, so may now be greater than ReadyCycle. + if (ReadyCycle > CurrCycle) + MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall); +#endif + if (ReadyCycle < MinReadyCycle) MinReadyCycle = ReadyCycle; @@ -1744,18 +1780,6 @@ void SchedBoundary::releaseTopNode(SUnit *SU) { if (SU->isScheduled) return; - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - if (I->isWeak()) - continue; - unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; - unsigned Latency = I->getLatency(); -#ifndef NDEBUG - MaxObservedLatency = std::max(Latency, MaxObservedLatency); -#endif - if (SU->TopReadyCycle < PredReadyCycle + Latency) - SU->TopReadyCycle = PredReadyCycle + Latency; - } releaseNode(SU, SU->TopReadyCycle); } @@ -1763,20 +1787,6 @@ void SchedBoundary::releaseBottomNode(SUnit *SU) { if (SU->isScheduled) return; - assert(SU->getInstr() && "Scheduled SUnit must have instr"); - - for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - if (I->isWeak()) - continue; - unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; - unsigned Latency = I->getLatency(); -#ifndef NDEBUG - MaxObservedLatency = std::max(Latency, MaxObservedLatency); -#endif - if (SU->BotReadyCycle < SuccReadyCycle + Latency) - SU->BotReadyCycle = SuccReadyCycle + Latency; - } releaseNode(SU, SU->BotReadyCycle); } @@ -1943,10 +1953,12 @@ void SchedBoundary::bumpNode(SUnit *SU) { PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { unsigned PIdx = PI->ProcResourceIdx; if (SchedModel->getProcResource(PIdx)->BufferSize == 0) { - ReservedCycles[PIdx] = isTop() ? NextCycle + PI->Cycles : NextCycle; -#ifndef NDEBUG - MaxObservedLatency = std::max(PI->Cycles, MaxObservedLatency); -#endif + if (isTop()) { + ReservedCycles[PIdx] = + std::max(getNextResourceCycle(PIdx, 0), NextCycle + PI->Cycles); + } + else + ReservedCycles[PIdx] = NextCycle; } } } @@ -2049,8 +2061,10 @@ SUnit *SchedBoundary::pickOnlyChoice() { } } for (unsigned i = 0; Available.empty(); ++i) { - assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedLatency) && - "permanent hazard"); (void)i; +// FIXME: Re-enable assert once PR20057 is resolved. +// assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) && +// "permanent hazard"); + (void)i; bumpCycle(CurrCycle + 1); releasePending(); } @@ -2090,111 +2104,6 @@ void SchedBoundary::dumpScheduledState() { // GenericScheduler - Generic implementation of MachineSchedStrategy. //===----------------------------------------------------------------------===// -namespace { -/// Base class for GenericScheduler. This class maintains information about -/// scheduling candidates based on TargetSchedModel making it easy to implement -/// heuristics for either preRA or postRA scheduling. -class GenericSchedulerBase : public MachineSchedStrategy { -public: - /// Represent the type of SchedCandidate found within a single queue. - /// pickNodeBidirectional depends on these listed by decreasing priority. - enum CandReason { - NoCand, PhysRegCopy, RegExcess, RegCritical, Stall, Cluster, Weak, RegMax, - ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce, - TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder}; - -#ifndef NDEBUG - static const char *getReasonStr(GenericSchedulerBase::CandReason Reason); -#endif - - /// Policy for scheduling the next instruction in the candidate's zone. - struct CandPolicy { - bool ReduceLatency; - unsigned ReduceResIdx; - unsigned DemandResIdx; - - CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {} - }; - - /// Status of an instruction's critical resource consumption. - struct SchedResourceDelta { - // Count critical resources in the scheduled region required by SU. - unsigned CritResources; - - // Count critical resources from another region consumed by SU. - unsigned DemandedResources; - - SchedResourceDelta(): CritResources(0), DemandedResources(0) {} - - bool operator==(const SchedResourceDelta &RHS) const { - return CritResources == RHS.CritResources - && DemandedResources == RHS.DemandedResources; - } - bool operator!=(const SchedResourceDelta &RHS) const { - return !operator==(RHS); - } - }; - - /// Store the state used by GenericScheduler heuristics, required for the - /// lifetime of one invocation of pickNode(). - struct SchedCandidate { - CandPolicy Policy; - - // The best SUnit candidate. - SUnit *SU; - - // The reason for this candidate. - CandReason Reason; - - // Set of reasons that apply to multiple candidates. - uint32_t RepeatReasonSet; - - // Register pressure values for the best candidate. - RegPressureDelta RPDelta; - - // Critical resource consumption of the best candidate. - SchedResourceDelta ResDelta; - - SchedCandidate(const CandPolicy &policy) - : Policy(policy), SU(nullptr), Reason(NoCand), RepeatReasonSet(0) {} - - bool isValid() const { return SU; } - - // Copy the status of another candidate without changing policy. - void setBest(SchedCandidate &Best) { - assert(Best.Reason != NoCand && "uninitialized Sched candidate"); - SU = Best.SU; - Reason = Best.Reason; - RPDelta = Best.RPDelta; - ResDelta = Best.ResDelta; - } - - bool isRepeat(CandReason R) { return RepeatReasonSet & (1 << R); } - void setRepeat(CandReason R) { RepeatReasonSet |= (1 << R); } - - void initResourceDelta(const ScheduleDAGMI *DAG, - const TargetSchedModel *SchedModel); - }; - -protected: - const MachineSchedContext *Context; - const TargetSchedModel *SchedModel; - const TargetRegisterInfo *TRI; - - SchedRemainder Rem; -protected: - GenericSchedulerBase(const MachineSchedContext *C): - Context(C), SchedModel(nullptr), TRI(nullptr) {} - - void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, - SchedBoundary *OtherZone); - -#ifndef NDEBUG - void traceCandidate(const SchedCandidate &Cand); -#endif -}; -} // namespace - void GenericSchedulerBase::SchedCandidate:: initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { @@ -2430,65 +2339,6 @@ static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand, << GenericSchedulerBase::getReasonStr(Cand.Reason) << '\n'); } -namespace { -/// GenericScheduler shrinks the unscheduled zone using heuristics to balance -/// the schedule. -class GenericScheduler : public GenericSchedulerBase { - ScheduleDAGMILive *DAG; - - // State of the top and bottom scheduled instruction boundaries. - SchedBoundary Top; - SchedBoundary Bot; - - MachineSchedPolicy RegionPolicy; -public: - GenericScheduler(const MachineSchedContext *C): - GenericSchedulerBase(C), DAG(nullptr), Top(SchedBoundary::TopQID, "TopQ"), - Bot(SchedBoundary::BotQID, "BotQ") {} - - void initPolicy(MachineBasicBlock::iterator Begin, - MachineBasicBlock::iterator End, - unsigned NumRegionInstrs) override; - - bool shouldTrackPressure() const override { - return RegionPolicy.ShouldTrackPressure; - } - - void initialize(ScheduleDAGMI *dag) override; - - SUnit *pickNode(bool &IsTopNode) override; - - void schedNode(SUnit *SU, bool IsTopNode) override; - - void releaseTopNode(SUnit *SU) override { - Top.releaseTopNode(SU); - } - - void releaseBottomNode(SUnit *SU) override { - Bot.releaseBottomNode(SU); - } - - void registerRoots() override; - -protected: - void checkAcyclicLatency(); - - void tryCandidate(SchedCandidate &Cand, - SchedCandidate &TryCand, - SchedBoundary &Zone, - const RegPressureTracker &RPTracker, - RegPressureTracker &TempTracker); - - SUnit *pickNodeBidirectional(bool &IsTopNode); - - void pickNodeFromQueue(SchedBoundary &Zone, - const RegPressureTracker &RPTracker, - SchedCandidate &Candidate); - - void reschedulePhysRegCopies(SUnit *SU, bool isTop); -}; -} // namespace - void GenericScheduler::initialize(ScheduleDAGMI *dag) { assert(dag->hasVRegLiveness() && "(PreRA)GenericScheduler needs vreg liveness"); @@ -3023,75 +2873,25 @@ GenericSchedRegistry("converge", "Standard converging scheduler.", // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy. //===----------------------------------------------------------------------===// -namespace { -/// PostGenericScheduler - Interface to the scheduling algorithm used by -/// ScheduleDAGMI. -/// -/// Callbacks from ScheduleDAGMI: -/// initPolicy -> initialize(DAG) -> registerRoots -> pickNode ... -class PostGenericScheduler : public GenericSchedulerBase { - ScheduleDAGMI *DAG; - SchedBoundary Top; - SmallVector<SUnit*, 8> BotRoots; -public: - PostGenericScheduler(const MachineSchedContext *C): - GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ") {} - - virtual ~PostGenericScheduler() {} - - void initPolicy(MachineBasicBlock::iterator Begin, - MachineBasicBlock::iterator End, - unsigned NumRegionInstrs) override { - /* no configurable policy */ - }; - - /// PostRA scheduling does not track pressure. - bool shouldTrackPressure() const override { return false; } - - void initialize(ScheduleDAGMI *Dag) override { - DAG = Dag; - SchedModel = DAG->getSchedModel(); - TRI = DAG->TRI; - - Rem.init(DAG, SchedModel); - Top.init(DAG, SchedModel, &Rem); - BotRoots.clear(); - - // Initialize the HazardRecognizers. If itineraries don't exist, are empty, - // or are disabled, then these HazardRecs will be disabled. - const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); - const TargetMachine &TM = DAG->MF.getTarget(); - if (!Top.HazardRec) { - Top.HazardRec = - TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); - } - } - - void registerRoots() override; - - SUnit *pickNode(bool &IsTopNode) override; - - void scheduleTree(unsigned SubtreeID) override { - llvm_unreachable("PostRA scheduler does not support subtree analysis."); - } - - void schedNode(SUnit *SU, bool IsTopNode) override; +void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) { + DAG = Dag; + SchedModel = DAG->getSchedModel(); + TRI = DAG->TRI; - void releaseTopNode(SUnit *SU) override { - Top.releaseTopNode(SU); - } + Rem.init(DAG, SchedModel); + Top.init(DAG, SchedModel, &Rem); + BotRoots.clear(); - // Only called for roots. - void releaseBottomNode(SUnit *SU) override { - BotRoots.push_back(SU); + // Initialize the HazardRecognizers. If itineraries don't exist, are empty, + // or are disabled, then these HazardRecs will be disabled. + const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); + const TargetMachine &TM = DAG->MF.getTarget(); + if (!Top.HazardRec) { + Top.HazardRec = + TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); } +} -protected: - void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand); - - void pickNodeFromQueue(SchedCandidate &Cand); -}; -} // namespace void PostGenericScheduler::registerRoots() { Rem.CriticalPath = DAG->ExitSU.getDepth(); diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index b3f7198..249b2d0 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -30,11 +30,6 @@ using namespace llvm; -namespace llvm { -extern cl::opt<bool> EnableStackMapLiveness; -extern cl::opt<bool> EnablePatchPointLiveness; -} - static cl::opt<bool> DisablePostRA("disable-post-ra", cl::Hidden, cl::desc("Disable Post Regalloc")); static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden, @@ -92,9 +87,9 @@ PrintMachineInstrs("print-machineinstrs", cl::ValueOptional, // Temporary option to allow experimenting with MachineScheduler as a post-RA // scheduler. Targets can "properly" enable this with -// substitutePass(&PostRASchedulerID, &MachineSchedulerID); Ideally it wouldn't -// be part of the standard pass pipeline, and the target would just add a PostRA -// scheduling pass wherever it wants. +// substitutePass(&PostRASchedulerID, &PostMachineSchedulerID); Ideally it +// wouldn't be part of the standard pass pipeline, and the target would just add +// a PostRA scheduling pass wherever it wants. static cl::opt<bool> MISchedPostRA("misched-postra", cl::Hidden, cl::desc("Run MachineScheduler post regalloc (independent of preRA sched)")); @@ -421,7 +416,7 @@ void TargetPassConfig::addPassesToHandleExceptions() { // FALLTHROUGH case ExceptionHandling::DwarfCFI: case ExceptionHandling::ARM: - case ExceptionHandling::Win64: + case ExceptionHandling::WinEH: addPass(createDwarfEHPass(TM)); break; case ExceptionHandling::None: @@ -566,8 +561,7 @@ void TargetPassConfig::addMachinePasses() { if (addPreEmitPass()) printAndVerify("After PreEmit passes"); - if (EnableStackMapLiveness || EnablePatchPointLiveness) - addPass(&StackMapLivenessID); + addPass(&StackMapLivenessID); } /// Add passes that optimize machine instructions in SSA form. diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index eeee93a..716cb1f 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -91,6 +91,10 @@ static cl::opt<bool> DisablePeephole("disable-peephole", cl::Hidden, cl::init(false), cl::desc("Disable the peephole optimizer")); +static cl::opt<bool> +DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(true), + cl::desc("Disable advanced copy optimization")); + STATISTIC(NumReuse, "Number of extension results reused"); STATISTIC(NumCmps, "Number of compares eliminated"); STATISTIC(NumImmFold, "Number of move immediate folded"); @@ -137,6 +141,105 @@ namespace { bool isLoadFoldable(MachineInstr *MI, SmallSet<unsigned, 16> &FoldAsLoadDefCandidates); }; + + /// \brief Helper class to track the possible sources of a value defined by + /// a (chain of) copy related instructions. + /// Given a definition (instruction and definition index), this class + /// follows the use-def chain to find successive suitable sources. + /// The given source can be used to rewrite the definition into + /// def = COPY src. + /// + /// For instance, let us consider the following snippet: + /// v0 = + /// v2 = INSERT_SUBREG v1, v0, sub0 + /// def = COPY v2.sub0 + /// + /// Using a ValueTracker for def = COPY v2.sub0 will give the following + /// suitable sources: + /// v2.sub0 and v0. + /// Then, def can be rewritten into def = COPY v0. + class ValueTracker { + private: + /// The current point into the use-def chain. + const MachineInstr *Def; + /// The index of the definition in Def. + unsigned DefIdx; + /// The sub register index of the definition. + unsigned DefSubReg; + /// The register where the value can be found. + unsigned Reg; + /// Specifiy whether or not the value tracking looks through + /// complex instructions. When this is false, the value tracker + /// bails on everything that is not a copy or a bitcast. + /// + /// Note: This could have been implemented as a specialized version of + /// the ValueTracker class but that would have complicated the code of + /// the users of this class. + bool UseAdvancedTracking; + /// Optional MachineRegisterInfo used to perform some complex + /// tracking. + const MachineRegisterInfo *MRI; + + /// \brief Dispatcher to the right underlying implementation of + /// getNextSource. + bool getNextSourceImpl(unsigned &SrcIdx, unsigned &SrcSubReg); + /// \brief Specialized version of getNextSource for Copy instructions. + bool getNextSourceFromCopy(unsigned &SrcIdx, unsigned &SrcSubReg); + /// \brief Specialized version of getNextSource for Bitcast instructions. + bool getNextSourceFromBitcast(unsigned &SrcIdx, unsigned &SrcSubReg); + /// \brief Specialized version of getNextSource for RegSequence + /// instructions. + bool getNextSourceFromRegSequence(unsigned &SrcIdx, unsigned &SrcSubReg); + /// \brief Specialized version of getNextSource for InsertSubreg + /// instructions. + bool getNextSourceFromInsertSubreg(unsigned &SrcIdx, unsigned &SrcSubReg); + /// \brief Specialized version of getNextSource for ExtractSubreg + /// instructions. + bool getNextSourceFromExtractSubreg(unsigned &SrcIdx, unsigned &SrcSubReg); + /// \brief Specialized version of getNextSource for SubregToReg + /// instructions. + bool getNextSourceFromSubregToReg(unsigned &SrcIdx, unsigned &SrcSubReg); + + public: + /// \brief Create a ValueTracker instance for the value defines by \p MI + /// at the operand index \p DefIdx. + /// \p DefSubReg represents the sub register index the value tracker will + /// track. It does not need to match the sub register index used in \p MI. + /// \p UseAdvancedTracking specifies whether or not the value tracker looks + /// through complex instructions. By default (false), it handles only copy + /// and bitcast instructions. + /// \p MRI useful to perform some complex checks. + ValueTracker(const MachineInstr &MI, unsigned DefIdx, unsigned DefSubReg, + bool UseAdvancedTracking = false, + const MachineRegisterInfo *MRI = nullptr) + : Def(&MI), DefIdx(DefIdx), DefSubReg(DefSubReg), + UseAdvancedTracking(UseAdvancedTracking), MRI(MRI) { + assert(Def->getOperand(DefIdx).isDef() && + Def->getOperand(DefIdx).isReg() && + "Definition does not match machine instruction"); + // Initially the value is in the defined register. + Reg = Def->getOperand(DefIdx).getReg(); + } + + /// \brief Following the use-def chain, get the next available source + /// for the tracked value. + /// When the returned value is not nullptr, getReg() gives the register + /// that contain the tracked value. + /// \note The sub register index returned in \p SrcSubReg must be used + /// on that getReg() to access the actual value. + /// \return Unless the returned value is nullptr (i.e., no source found), + /// \p SrcIdx gives the index of the next source in the returned + /// instruction and \p SrcSubReg the index to be used on that source to + /// get the tracked value. When nullptr is returned, no alternative source + /// has been found. + const MachineInstr *getNextSource(unsigned &SrcIdx, unsigned &SrcSubReg); + + /// \brief Get the last register where the initial value can be found. + /// Initially this is the register of the definition. + /// Then, after each successful call to getNextSource, this is the + /// register of the last source. + unsigned getReg() const { return Reg; } + }; } char PeepholeOptimizer::ID = 0; @@ -443,31 +546,32 @@ bool PeepholeOptimizer::optimizeCopyOrBitcast(MachineInstr *MI) { unsigned Src; unsigned SrcSubReg; bool ShouldRewrite = false; - MachineInstr *Copy = MI; const TargetRegisterInfo &TRI = *TM->getRegisterInfo(); - // Follow the chain of copies until we reach the top or find a - // more suitable source. + // Follow the chain of copies until we reach the top of the use-def chain + // or find a more suitable source. + ValueTracker ValTracker(*MI, DefIdx, DefSubReg, !DisableAdvCopyOpt, MRI); do { - unsigned CopyDefIdx, CopySrcIdx; - if (!getCopyOrBitcastDefUseIdx(*Copy, CopyDefIdx, CopySrcIdx)) + unsigned CopySrcIdx, CopySrcSubReg; + if (!ValTracker.getNextSource(CopySrcIdx, CopySrcSubReg)) break; - const MachineOperand &MO = Copy->getOperand(CopySrcIdx); - assert(MO.isReg() && "Copies must be between registers."); - Src = MO.getReg(); - + Src = ValTracker.getReg(); + SrcSubReg = CopySrcSubReg; + + // Do not extend the live-ranges of physical registers as they add + // constraints to the register allocator. + // Moreover, if we want to extend the live-range of a physical register, + // unlike SSA virtual register, we will have to check that they are not + // redefine before the related use. if (TargetRegisterInfo::isPhysicalRegister(Src)) break; const TargetRegisterClass *SrcRC = MRI->getRegClass(Src); - SrcSubReg = MO.getSubReg(); // If this source does not incur a cross register bank copy, use it. ShouldRewrite = shareSameRegisterFile(TRI, DefRC, DefSubReg, SrcRC, SrcSubReg); - // Follow the chain of copies: get the definition of Src. - Copy = MRI->getVRegDef(Src); - } while (!ShouldRewrite && Copy && (Copy->isCopy() || Copy->isBitcast())); + } while (!ShouldRewrite); // If we did not find a more suitable source, there is nothing to optimize. if (!ShouldRewrite || Src == MI->getOperand(SrcIdx).getReg()) @@ -483,6 +587,9 @@ bool PeepholeOptimizer::optimizeCopyOrBitcast(MachineInstr *MI) { MRI->replaceRegWith(Def, NewVR); MRI->clearKillFlags(NewVR); + // We extended the lifetime of Src. + // Clear the kill flags to account for that. + MRI->clearKillFlags(Src); MI->eraseFromParent(); ++NumCopiesBitcasts; return true; @@ -673,3 +780,251 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { return Changed; } + +bool ValueTracker::getNextSourceFromCopy(unsigned &SrcIdx, + unsigned &SrcSubReg) { + assert(Def->isCopy() && "Invalid definition"); + // Copy instruction are supposed to be: Def = Src. + // If someone breaks this assumption, bad things will happen everywhere. + assert(Def->getDesc().getNumOperands() == 2 && "Invalid number of operands"); + + if (Def->getOperand(DefIdx).getSubReg() != DefSubReg) + // If we look for a different subreg, it means we want a subreg of src. + // Bails as we do not support composing subreg yet. + return false; + // Otherwise, we want the whole source. + SrcIdx = 1; + SrcSubReg = Def->getOperand(SrcIdx).getSubReg(); + return true; +} + +bool ValueTracker::getNextSourceFromBitcast(unsigned &SrcIdx, + unsigned &SrcSubReg) { + assert(Def->isBitcast() && "Invalid definition"); + + // Bail if there are effects that a plain copy will not expose. + if (Def->hasUnmodeledSideEffects()) + return false; + + // Bitcasts with more than one def are not supported. + if (Def->getDesc().getNumDefs() != 1) + return false; + if (Def->getOperand(DefIdx).getSubReg() != DefSubReg) + // If we look for a different subreg, it means we want a subreg of the src. + // Bails as we do not support composing subreg yet. + return false; + + SrcIdx = Def->getDesc().getNumOperands(); + for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx; + ++OpIdx) { + const MachineOperand &MO = Def->getOperand(OpIdx); + if (!MO.isReg() || !MO.getReg()) + continue; + assert(!MO.isDef() && "We should have skipped all the definitions by now"); + if (SrcIdx != EndOpIdx) + // Multiple sources? + return false; + SrcIdx = OpIdx; + } + SrcSubReg = Def->getOperand(SrcIdx).getSubReg(); + return true; +} + +bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcIdx, + unsigned &SrcSubReg) { + assert(Def->isRegSequence() && "Invalid definition"); + + if (Def->getOperand(DefIdx).getSubReg()) + // If we are composing subreg, bails out. + // The case we are checking is Def.<subreg> = REG_SEQUENCE. + // This should almost never happen as the SSA property is tracked at + // the register level (as opposed to the subreg level). + // I.e., + // Def.sub0 = + // Def.sub1 = + // is a valid SSA representation for Def.sub0 and Def.sub1, but not for + // Def. Thus, it must not be generated. + // However, some code could theoretically generates a single + // Def.sub0 (i.e, not defining the other subregs) and we would + // have this case. + // If we can ascertain (or force) that this never happens, we could + // turn that into an assertion. + return false; + + // We are looking at: + // Def = REG_SEQUENCE v0, sub0, v1, sub1, ... + // Check if one of the operand defines the subreg we are interested in. + for (unsigned OpIdx = DefIdx + 1, EndOpIdx = Def->getNumOperands(); + OpIdx != EndOpIdx; OpIdx += 2) { + const MachineOperand &MOSubIdx = Def->getOperand(OpIdx + 1); + assert(MOSubIdx.isImm() && + "One of the subindex of the reg_sequence is not an immediate"); + if (MOSubIdx.getImm() == DefSubReg) { + assert(Def->getOperand(OpIdx).isReg() && + "One of the source of the reg_sequence is not a register"); + SrcIdx = OpIdx; + SrcSubReg = Def->getOperand(SrcIdx).getSubReg(); + return true; + } + } + + // If the subreg we are tracking is super-defined by another subreg, + // we could follow this value. However, this would require to compose + // the subreg and we do not do that for now. + return false; +} + +bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcIdx, + unsigned &SrcSubReg) { + assert(Def->isInsertSubreg() && "Invalid definition"); + if (Def->getOperand(DefIdx).getSubReg()) + // If we are composing subreg, bails out. + // Same remark as getNextSourceFromRegSequence. + // I.e., this may be turned into an assert. + return false; + + // We are looking at: + // Def = INSERT_SUBREG v0, v1, sub1 + // There are two cases: + // 1. DefSubReg == sub1, get v1. + // 2. DefSubReg != sub1, the value may be available through v0. + + // #1 Check if the inserted register matches the require sub index. + unsigned InsertedSubReg = Def->getOperand(3).getImm(); + if (InsertedSubReg == DefSubReg) { + SrcIdx = 2; + SrcSubReg = Def->getOperand(SrcIdx).getSubReg(); + return true; + } + // #2 Otherwise, if the sub register we are looking for is not partial + // defined by the inserted element, we can look through the main + // register (v0). + // To check the overlapping we need a MRI and a TRI. + if (!MRI) + return false; + + const MachineOperand &MODef = Def->getOperand(DefIdx); + const MachineOperand &MOBase = Def->getOperand(1); + // If the result register (Def) and the base register (v0) do not + // have the same register class or if we have to compose + // subregisters, bails out. + if (MRI->getRegClass(MODef.getReg()) != MRI->getRegClass(MOBase.getReg()) || + MOBase.getSubReg()) + return false; + + // Get the TRI and check if inserted sub register overlaps with the + // sub register we are tracking. + const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); + if (!TRI || + (TRI->getSubRegIndexLaneMask(DefSubReg) & + TRI->getSubRegIndexLaneMask(InsertedSubReg)) != 0) + return false; + // At this point, the value is available in v0 via the same subreg + // we used for Def. + SrcIdx = 1; + SrcSubReg = DefSubReg; + return true; +} + +bool ValueTracker::getNextSourceFromExtractSubreg(unsigned &SrcIdx, + unsigned &SrcSubReg) { + assert(Def->isExtractSubreg() && "Invalid definition"); + // We are looking at: + // Def = EXTRACT_SUBREG v0, sub0 + + // Bails if we have to compose sub registers. + // Indeed, if DefSubReg != 0, we would have to compose it with sub0. + if (DefSubReg) + return false; + + // Bails if we have to compose sub registers. + // Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0. + if (Def->getOperand(1).getSubReg()) + return false; + // Otherwise, the value is available in the v0.sub0. + SrcIdx = 1; + SrcSubReg = Def->getOperand(2).getImm(); + return true; +} + +bool ValueTracker::getNextSourceFromSubregToReg(unsigned &SrcIdx, + unsigned &SrcSubReg) { + assert(Def->isSubregToReg() && "Invalid definition"); + // We are looking at: + // Def = SUBREG_TO_REG Imm, v0, sub0 + + // Bails if we have to compose sub registers. + // If DefSubReg != sub0, we would have to check that all the bits + // we track are included in sub0 and if yes, we would have to + // determine the right subreg in v0. + if (DefSubReg != Def->getOperand(3).getImm()) + return false; + // Bails if we have to compose sub registers. + // Likewise, if v0.subreg != 0, we would have to compose it with sub0. + if (Def->getOperand(2).getSubReg()) + return false; + + SrcIdx = 2; + SrcSubReg = Def->getOperand(3).getImm(); + return true; +} + +bool ValueTracker::getNextSourceImpl(unsigned &SrcIdx, unsigned &SrcSubReg) { + assert(Def && "This method needs a valid definition"); + + assert( + (DefIdx < Def->getDesc().getNumDefs() || Def->getDesc().isVariadic()) && + Def->getOperand(DefIdx).isDef() && "Invalid DefIdx"); + if (Def->isCopy()) + return getNextSourceFromCopy(SrcIdx, SrcSubReg); + if (Def->isBitcast()) + return getNextSourceFromBitcast(SrcIdx, SrcSubReg); + // All the remaining cases involve "complex" instructions. + // Bails if we did not ask for the advanced tracking. + if (!UseAdvancedTracking) + return false; + if (Def->isRegSequence()) + return getNextSourceFromRegSequence(SrcIdx, SrcSubReg); + if (Def->isInsertSubreg()) + return getNextSourceFromInsertSubreg(SrcIdx, SrcSubReg); + if (Def->isExtractSubreg()) + return getNextSourceFromExtractSubreg(SrcIdx, SrcSubReg); + if (Def->isSubregToReg()) + return getNextSourceFromSubregToReg(SrcIdx, SrcSubReg); + return false; +} + +const MachineInstr *ValueTracker::getNextSource(unsigned &SrcIdx, + unsigned &SrcSubReg) { + // If we reach a point where we cannot move up in the use-def chain, + // there is nothing we can get. + if (!Def) + return nullptr; + + const MachineInstr *PrevDef = nullptr; + // Try to find the next source. + if (getNextSourceImpl(SrcIdx, SrcSubReg)) { + // Update definition, definition index, and subregister for the + // next call of getNextSource. + const MachineOperand &MO = Def->getOperand(SrcIdx); + assert(MO.isReg() && !MO.isDef() && "Source is invalid"); + // Update the current register. + Reg = MO.getReg(); + // Update the return value before moving up in the use-def chain. + PrevDef = Def; + // If we can still move up in the use-def chain, move to the next + // defintion. + if (!TargetRegisterInfo::isPhysicalRegister(Reg)) { + Def = MRI->getVRegDef(Reg); + DefIdx = MRI->def_begin(Reg).getOperandNo(); + DefSubReg = SrcSubReg; + return PrevDef; + } + } + // If we end up here, this means we will not be able to find another source + // for the next iteration. + // Make sure any new call to getNextSource bails out early by cutting the + // use-def chain. + Def = nullptr; + return PrevDef; +} diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index c74a42f..b98d210 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -160,7 +160,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { replaceFrameIndices(Fn); // If register scavenging is needed, as we've enabled doing it as a - // post-pass, scavenge the virtual registers that frame index elimiation + // post-pass, scavenge the virtual registers that frame index elimination // inserted. if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging) scavengeFrameVirtualRegs(Fn); @@ -268,51 +268,56 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &F) { } } - if (CSI.empty()) - return; // Early exit if no callee saved registers are modified! + if (!TFI->assignCalleeSavedSpillSlots(F, RegInfo, CSI)) { + // If target doesn't implement this, use generic code. - unsigned NumFixedSpillSlots; - const TargetFrameLowering::SpillSlot *FixedSpillSlots = - TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots); + if (CSI.empty()) + return; // Early exit if no callee saved registers are modified! - // Now that we know which registers need to be saved and restored, allocate - // stack slots for them. - for (std::vector<CalleeSavedInfo>::iterator - I = CSI.begin(), E = CSI.end(); I != E; ++I) { - unsigned Reg = I->getReg(); - const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg); + unsigned NumFixedSpillSlots; + const TargetFrameLowering::SpillSlot *FixedSpillSlots = + TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots); - int FrameIdx; - if (RegInfo->hasReservedSpillSlot(F, Reg, FrameIdx)) { - I->setFrameIdx(FrameIdx); - continue; - } + // Now that we know which registers need to be saved and restored, allocate + // stack slots for them. + for (std::vector<CalleeSavedInfo>::iterator I = CSI.begin(), E = CSI.end(); + I != E; ++I) { + unsigned Reg = I->getReg(); + const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg); - // Check to see if this physreg must be spilled to a particular stack slot - // on this target. - const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots; - while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots && - FixedSlot->Reg != Reg) - ++FixedSlot; - - if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) { - // Nope, just spill it anywhere convenient. - unsigned Align = RC->getAlignment(); - unsigned StackAlign = TFI->getStackAlignment(); - - // We may not be able to satisfy the desired alignment specification of - // the TargetRegisterClass if the stack alignment is smaller. Use the - // min. - Align = std::min(Align, StackAlign); - FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true); - if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx; - if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; - } else { - // Spill it to the stack where we must. - FrameIdx = MFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset, true); - } + int FrameIdx; + if (RegInfo->hasReservedSpillSlot(F, Reg, FrameIdx)) { + I->setFrameIdx(FrameIdx); + continue; + } + + // Check to see if this physreg must be spilled to a particular stack slot + // on this target. + const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots; + while (FixedSlot != FixedSpillSlots + NumFixedSpillSlots && + FixedSlot->Reg != Reg) + ++FixedSlot; + + if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) { + // Nope, just spill it anywhere convenient. + unsigned Align = RC->getAlignment(); + unsigned StackAlign = TFI->getStackAlignment(); + + // We may not be able to satisfy the desired alignment specification of + // the TargetRegisterClass if the stack alignment is smaller. Use the + // min. + Align = std::min(Align, StackAlign); + FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true); + if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx; + if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; + } else { + // Spill it to the stack where we must. + FrameIdx = + MFI->CreateFixedSpillStackObject(RC->getSize(), FixedSlot->Offset); + } - I->setFrameIdx(FrameIdx); + I->setFrameIdx(FrameIdx); + } } MFI->setCalleeSavedInfo(CSI); diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index aa7c178..901b993 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -44,6 +44,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <queue> using namespace llvm; @@ -79,6 +80,12 @@ ExhaustiveSearch("exhaustive-register-search", cl::NotHidden, cl::desc("Exhaustive Search for registers bypassing the depth " "and interference cutoffs of last chance recoloring")); +static cl::opt<bool> EnableLocalReassignment( + "enable-local-reassign", cl::Hidden, + cl::desc("Local reassignment can yield better allocation decisions, but " + "may be compile time intensive"), + cl::init(false)); + // FIXME: Find a good default for this flag and remove the flag. static cl::opt<unsigned> CSRFirstTimeCost("regalloc-csr-first-time-cost", @@ -285,6 +292,10 @@ class RAGreedy : public MachineFunctionPass, /// Callee-save register cost, calculated once per machine function. BlockFrequency CSRCost; + /// Run or not the local reassignment heuristic. This information is + /// obtained from the TargetSubtargetInfo. + bool EnableLocalReassign; + public: RAGreedy(); @@ -731,7 +742,7 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, // Evicting another local live range in this case could lead to suboptimal // coloring. if (!MaxCost.isMax() && IsLocal && LIS->intervalIsInOneMBB(*Intf) && - !canReassign(*Intf, PhysReg)) { + (!EnableLocalReassign || !canReassign(*Intf, PhysReg))) { return false; } } @@ -2308,9 +2319,14 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { << "********** Function: " << mf.getName() << '\n'); MF = &mf; - TRI = MF->getTarget().getRegisterInfo(); - TII = MF->getTarget().getInstrInfo(); + const TargetMachine &TM = MF->getTarget(); + TRI = TM.getRegisterInfo(); + TII = TM.getInstrInfo(); RCI.runOnMachineFunction(mf); + + EnableLocalReassign = EnableLocalReassignment || + TM.getSubtargetImpl()->enableRALocalReassignment(TM.getOptLevel()); + if (VerifyEnabled) MF->verify(this, "Before greedy register allocator"); diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp index b2909e0..617e459 100644 --- a/lib/CodeGen/RegisterPressure.cpp +++ b/lib/CodeGen/RegisterPressure.cpp @@ -41,7 +41,7 @@ static void decreaseSetPressure(std::vector<unsigned> &CurrSetPressure, } } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void llvm::dumpRegSetPressure(ArrayRef<unsigned> SetPressure, const TargetRegisterInfo *TRI) { bool Empty = true; @@ -55,6 +55,7 @@ void llvm::dumpRegSetPressure(ArrayRef<unsigned> SetPressure, dbgs() << "\n"; } +LLVM_DUMP_METHOD void RegisterPressure::dump(const TargetRegisterInfo *TRI) const { dbgs() << "Max Pressure: "; dumpRegSetPressure(MaxSetPressure, TRI); @@ -68,6 +69,7 @@ void RegisterPressure::dump(const TargetRegisterInfo *TRI) const { dbgs() << '\n'; } +LLVM_DUMP_METHOD void RegPressureTracker::dump() const { if (!isTopClosed() || !isBottomClosed()) { dbgs() << "Curr Pressure: "; @@ -75,7 +77,6 @@ void RegPressureTracker::dump() const { } P.dump(TRI); } -#endif /// Increase the current pressure as impacted by these registers and bump /// the high water mark if needed. diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 92a9a30..0f8b21c 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -1508,7 +1508,7 @@ void SchedDFSResult::scheduleTree(unsigned SubtreeID) { } } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void ILPValue::print(raw_ostream &OS) const { OS << InstrCount << " / " << Length << " = "; if (!Length) @@ -1517,16 +1517,17 @@ void ILPValue::print(raw_ostream &OS) const { OS << format("%g", ((double)InstrCount / Length)); } +LLVM_DUMP_METHOD void ILPValue::dump() const { dbgs() << *this << '\n'; } namespace llvm { +LLVM_DUMP_METHOD raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) { Val.print(OS); return OS; } } // namespace llvm -#endif // !NDEBUG || LLVM_ENABLE_DUMP diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 2d2fd53..7c42e4d 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -167,9 +167,18 @@ namespace { bool CombineToPreIndexedLoadStore(SDNode *N); bool CombineToPostIndexedLoadStore(SDNode *N); - SDValue SplitIndexingFromLoad(LoadSDNode *LD); bool SliceUpLoad(SDNode *N); + /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed + /// load. + /// + /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced. + /// \param InVecVT type of the input vector to EVE with bitcasts resolved. + /// \param EltNo index of the vector element to load. + /// \param OriginalLoad load that EVE came from to be replaced. + /// \returns EVE on success SDValue() on failure. + SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad( + SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad); void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad); SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace); SDValue SExtPromoteOperand(SDValue Op, EVT PVT); @@ -646,10 +655,14 @@ static ConstantSDNode *isConstOrConstSplat(SDValue N) { return CN; if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { - ConstantSDNode *CN = BV->getConstantSplatValue(); + BitVector UndefElements; + ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements); // BuildVectors can truncate their operands. Ignore that case here. - if (CN && CN->getValueType(0) == N.getValueType().getScalarType()) + // FIXME: We blindly ignore splats which include undef which is overly + // pessimistic. + if (CN && UndefElements.none() && + CN->getValueType(0) == N.getValueType().getScalarType()) return CN; } @@ -762,14 +775,10 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { // If the operands of this node are only used by the node, they will now // be dead. Make sure to visit them first to delete dead nodes early. - for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i) { - SDNode *Op = TLO.Old.getNode()->getOperand(i).getNode(); - // For an operand generating multiple values, one of the values may - // become dead allowing further simplification (e.g. split index - // arithmetic from an indexed load). - if (Op->hasOneUse() || Op->getNumValues() > 1) - AddToWorkList(Op); - } + for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i) + if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse()) + AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode()); + DAG.DeleteNode(TLO.Old.getNode()); } } @@ -1320,9 +1329,16 @@ SDValue DAGCombiner::combine(SDNode *N) { // Constant operands are canonicalized to RHS. if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) { - SDValue Ops[] = { N1, N0 }; - SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), - Ops); + SDValue Ops[] = {N1, N0}; + SDNode *CSENode; + if (const BinaryWithFlagsSDNode *BinNode = + dyn_cast<BinaryWithFlagsSDNode>(N)) { + CSENode = DAG.getNodeIfExists( + N->getOpcode(), N->getVTList(), Ops, BinNode->hasNoUnsignedWrap(), + BinNode->hasNoSignedWrap(), BinNode->isExact()); + } else { + CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops); + } if (CSENode) return SDValue(CSENode, 0); } @@ -3942,14 +3958,14 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // If setcc produces all-one true value then: // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV) if (N1CV && N1CV->isConstant()) { - if (N0.getOpcode() == ISD::AND && - TLI.getBooleanContents(true) == - TargetLowering::ZeroOrNegativeOneBooleanContent) { + if (N0.getOpcode() == ISD::AND) { SDValue N00 = N0->getOperand(0); SDValue N01 = N0->getOperand(1); BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01); - if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC) { + if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC && + TLI.getBooleanContents(N00.getOperand(0).getValueType()) == + TargetLowering::ZeroOrNegativeOneBooleanContent) { SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV); if (C.getNode()) return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C); @@ -4508,11 +4524,20 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1) return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); // fold (select C, 0, 1) -> (xor C, 1) + // We can't do this reliably if integer based booleans have different contents + // to floating point based booleans. This is because we can't tell whether we + // have an integer-based boolean or a floating-point-based boolean unless we + // can find the SETCC that produced it and inspect its operands. This is + // fairly easy if C is the SETCC node, but it can potentially be + // undiscoverable (or not reasonably discoverable). For example, it could be + // in another basic block or it could require searching a complicated + // expression. if (VT.isInteger() && - (VT0 == MVT::i1 || - (VT0.isInteger() && - TLI.getBooleanContents(false) == - TargetLowering::ZeroOrOneBooleanContent)) && + (VT0 == MVT::i1 || (VT0.isInteger() && + TLI.getBooleanContents(false, false) == + TLI.getBooleanContents(false, true) && + TLI.getBooleanContents(false, false) == + TargetLowering::ZeroOrOneBooleanContent)) && N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) { SDValue XORNode; if (VT == VT0) @@ -4555,12 +4580,9 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // fold selects based on a setcc into other things, such as min/max/abs if (N0.getOpcode() == ISD::SETCC) { - // FIXME: - // Check against MVT::Other for SELECT_CC, which is a workaround for targets - // having to say they don't support SELECT_CC on every type the DAG knows - // about, since there is no way to mark an opcode illegal at all value types - if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other) && - TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) + if ((!LegalOperations && + TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) || + TLI.isOperationLegal(ISD::SELECT_CC, VT)) return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), N1, N2, N0.getOperand(2)); @@ -4587,6 +4609,56 @@ std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) { return std::make_pair(Lo, Hi); } +// This function assumes all the vselect's arguments are CONCAT_VECTOR +// nodes and that the condition is a BV of ConstantSDNodes (or undefs). +static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { + SDLoc dl(N); + SDValue Cond = N->getOperand(0); + SDValue LHS = N->getOperand(1); + SDValue RHS = N->getOperand(2); + MVT VT = N->getSimpleValueType(0); + int NumElems = VT.getVectorNumElements(); + assert(LHS.getOpcode() == ISD::CONCAT_VECTORS && + RHS.getOpcode() == ISD::CONCAT_VECTORS && + Cond.getOpcode() == ISD::BUILD_VECTOR); + + // We're sure we have an even number of elements due to the + // concat_vectors we have as arguments to vselect. + // Skip BV elements until we find one that's not an UNDEF + // After we find an UNDEF element, keep looping until we get to half the + // length of the BV and see if all the non-undef nodes are the same. + ConstantSDNode *BottomHalf = nullptr; + for (int i = 0; i < NumElems / 2; ++i) { + if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF) + continue; + + if (BottomHalf == nullptr) + BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i)); + else if (Cond->getOperand(i).getNode() != BottomHalf) + return SDValue(); + } + + // Do the same for the second half of the BuildVector + ConstantSDNode *TopHalf = nullptr; + for (int i = NumElems / 2; i < NumElems; ++i) { + if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF) + continue; + + if (TopHalf == nullptr) + TopHalf = cast<ConstantSDNode>(Cond.getOperand(i)); + else if (Cond->getOperand(i).getNode() != TopHalf) + return SDValue(); + } + + assert(TopHalf && BottomHalf && + "One half of the selector was all UNDEFs and the other was all the " + "same value. This should have been addressed before this function."); + return DAG.getNode( + ISD::CONCAT_VECTORS, dl, VT, + BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0), + TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1)); +} + SDValue DAGCombiner::visitVSELECT(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -4659,6 +4731,17 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { if (ISD::isBuildVectorAllZeros(N0.getNode())) return N2; + // The ConvertSelectToConcatVector function is assuming both the above + // checks for (vselect (build_vector all{ones,zeros) ...) have been made + // and addressed. + if (N1.getOpcode() == ISD::CONCAT_VECTORS && + N2.getOpcode() == ISD::CONCAT_VECTORS && + ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { + SDValue CV = ConvertSelectToConcatVector(N, DAG); + if (CV.getNode()) + return CV; + } + return SDValue(); } @@ -5003,12 +5086,12 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { } if (N0.getOpcode() == ISD::SETCC) { + EVT N0VT = N0.getOperand(0).getValueType(); // sext(setcc) -> sext_in_reg(vsetcc) for vectors. // Only do this before legalize for now. if (VT.isVector() && !LegalOperations && - TLI.getBooleanContents(true) == - TargetLowering::ZeroOrNegativeOneBooleanContent) { - EVT N0VT = N0.getOperand(0).getValueType(); + TLI.getBooleanContents(N0VT) == + TargetLowering::ZeroOrNegativeOneBooleanContent) { // On some architectures (such as SSE/NEON/etc) the SETCC result type is // of the same size as the compared operands. Only optimize sext(setcc()) // if this is the case. @@ -6140,6 +6223,9 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && // Do not change the width of a volatile load. !cast<LoadSDNode>(N0)->isVolatile() && + // Do not remove the cast if the types differ in endian layout. + TLI.hasBigEndianPartOrdering(N0.getValueType()) == + TLI.hasBigEndianPartOrdering(VT) && (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) && TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); @@ -6955,11 +7041,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { } // The next optimizations are desirable only if SELECT_CC can be lowered. - // Check against MVT::Other for SELECT_CC, which is a workaround for targets - // having to say they don't support SELECT_CC on every type the DAG knows - // about, since there is no way to mark an opcode illegal at all value types - // (See also visitSELECT) - if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) { + if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) { // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 && !VT.isVector() && @@ -7012,11 +7094,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { } // The next optimizations are desirable only if SELECT_CC can be lowered. - // Check against MVT::Other for SELECT_CC, which is a workaround for targets - // having to say they don't support SELECT_CC on every type the DAG knows - // about, since there is no way to mark an opcode illegal at all value types - // (See also visitSELECT) - if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) { + if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) { // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) if (N0.getOpcode() == ISD::SETCC && !VT.isVector() && @@ -7849,17 +7927,6 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { return false; } -/// \brief Return the base-pointer arithmetic from an indexed \p LD. -SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) { - ISD::MemIndexedMode AM = LD->getAddressingMode(); - assert(AM != ISD::UNINDEXED); - SDValue BP = LD->getOperand(1); - SDValue Inc = LD->getOperand(2); - unsigned Opc = - (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB); - return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc); -} - SDValue DAGCombiner::visitLOAD(SDNode *N) { LoadSDNode *LD = cast<LoadSDNode>(N); SDValue Chain = LD->getChain(); @@ -7896,16 +7963,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { } else { // Indexed loads. assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); - if (!N->hasAnyUseOfValue(0)) { + if (!N->hasAnyUseOfValue(0) && !N->hasAnyUseOfValue(1)) { SDValue Undef = DAG.getUNDEF(N->getValueType(0)); - SDValue Index; - if (N->hasAnyUseOfValue(1)) { - Index = SplitIndexingFromLoad(LD); - // Try to fold the base pointer arithmetic into subsequent loads and - // stores. - AddUsersToWorkList(N); - } else - Index = DAG.getUNDEF(N->getValueType(1)); DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG); dbgs() << "\nWith: "; @@ -7913,7 +7972,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { dbgs() << " and 2 other values\n"); WorkListRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef); - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), + DAG.getUNDEF(N->getValueType(1))); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain); removeFromWorkList(N); DAG.DeleteNode(N); @@ -9666,6 +9726,27 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { return SDValue(); unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); + // Canonicalize insert_vector_elt dag nodes. + // Example: + // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1) + // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0) + // + // Do this only if the child insert_vector node has one use; also + // do this only if indices are both constants and Idx1 < Idx0. + if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse() + && isa<ConstantSDNode>(InVec.getOperand(2))) { + unsigned OtherElt = + cast<ConstantSDNode>(InVec.getOperand(2))->getZExtValue(); + if (Elt < OtherElt) { + // Swap nodes. + SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VT, + InVec.getOperand(0), InVal, EltNo); + AddToWorkList(NewOp.getNode()); + return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()), + VT, NewOp, InVec.getOperand(1), InVec.getOperand(2)); + } + } + // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially // be converted to a BUILD_VECTOR). Fill in the Ops vector with the // vector elements. @@ -9698,6 +9779,86 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } +SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad( + SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) { + EVT ResultVT = EVE->getValueType(0); + EVT VecEltVT = InVecVT.getVectorElementType(); + unsigned Align = OriginalLoad->getAlignment(); + unsigned NewAlign = TLI.getDataLayout()->getABITypeAlignment( + VecEltVT.getTypeForEVT(*DAG.getContext())); + + if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT)) + return SDValue(); + + Align = NewAlign; + + SDValue NewPtr = OriginalLoad->getBasePtr(); + SDValue Offset; + EVT PtrType = NewPtr.getValueType(); + MachinePointerInfo MPI; + if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) { + int Elt = ConstEltNo->getZExtValue(); + unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8; + if (TLI.isBigEndian()) + PtrOff = InVecVT.getSizeInBits() / 8 - PtrOff; + Offset = DAG.getConstant(PtrOff, PtrType); + MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff); + } else { + Offset = DAG.getNode( + ISD::MUL, SDLoc(EVE), EltNo.getValueType(), EltNo, + DAG.getConstant(VecEltVT.getStoreSize(), EltNo.getValueType())); + if (TLI.isBigEndian()) + Offset = DAG.getNode( + ISD::SUB, SDLoc(EVE), EltNo.getValueType(), + DAG.getConstant(InVecVT.getStoreSize(), EltNo.getValueType()), Offset); + MPI = OriginalLoad->getPointerInfo(); + } + NewPtr = DAG.getNode(ISD::ADD, SDLoc(EVE), PtrType, NewPtr, Offset); + + // The replacement we need to do here is a little tricky: we need to + // replace an extractelement of a load with a load. + // Use ReplaceAllUsesOfValuesWith to do the replacement. + // Note that this replacement assumes that the extractvalue is the only + // use of the load; that's okay because we don't want to perform this + // transformation in other cases anyway. + SDValue Load; + SDValue Chain; + if (ResultVT.bitsGT(VecEltVT)) { + // If the result type of vextract is wider than the load, then issue an + // extending load instead. + ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, VecEltVT) + ? ISD::ZEXTLOAD + : ISD::EXTLOAD; + Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT, OriginalLoad->getChain(), + NewPtr, MPI, VecEltVT, OriginalLoad->isVolatile(), + OriginalLoad->isNonTemporal(), Align, + OriginalLoad->getTBAAInfo()); + Chain = Load.getValue(1); + } else { + Load = DAG.getLoad( + VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, + OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(), + OriginalLoad->isInvariant(), Align, OriginalLoad->getTBAAInfo()); + Chain = Load.getValue(1); + if (ResultVT.bitsLT(VecEltVT)) + Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load); + else + Load = DAG.getNode(ISD::BITCAST, SDLoc(EVE), ResultVT, Load); + } + WorkListRemover DeadNodes(*this); + SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) }; + SDValue To[] = { Load, Chain }; + DAG.ReplaceAllUsesOfValuesWith(From, To, 2); + // Since we're explicitly calling ReplaceAllUses, add the new node to the + // worklist explicitly as well. + AddToWorkList(Load.getNode()); + AddUsersToWorkList(Load.getNode()); // Add users too + // Make sure to revisit this node to clean it up; it will usually be dead. + AddToWorkList(EVE); + ++OpsNarrowed; + return SDValue(EVE, 0); +} + SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // (vextract (scalar_to_vector val, 0) -> val SDValue InVec = N->getOperand(0); @@ -9766,6 +9927,38 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { } } + bool BCNumEltsChanged = false; + EVT ExtVT = VT.getVectorElementType(); + EVT LVT = ExtVT; + + // If the result of load has to be truncated, then it's not necessarily + // profitable. + if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT)) + return SDValue(); + + if (InVec.getOpcode() == ISD::BITCAST) { + // Don't duplicate a load with other uses. + if (!InVec.hasOneUse()) + return SDValue(); + + EVT BCVT = InVec.getOperand(0).getValueType(); + if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType())) + return SDValue(); + if (VT.getVectorNumElements() != BCVT.getVectorNumElements()) + BCNumEltsChanged = true; + InVec = InVec.getOperand(0); + ExtVT = BCVT.getVectorElementType(); + } + + // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size) + if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() && + ISD::isNormalLoad(InVec.getNode())) { + SDValue Index = N->getOperand(1); + if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) + return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index, + OrigLoad); + } + // Perform only after legalization to ensure build_vector / vector_shuffle // optimizations have already been done. if (!LegalOperations) return SDValue(); @@ -9776,30 +9969,6 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { if (ConstEltNo) { int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); - bool NewLoad = false; - bool BCNumEltsChanged = false; - EVT ExtVT = VT.getVectorElementType(); - EVT LVT = ExtVT; - - // If the result of load has to be truncated, then it's not necessarily - // profitable. - if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT)) - return SDValue(); - - if (InVec.getOpcode() == ISD::BITCAST) { - // Don't duplicate a load with other uses. - if (!InVec.hasOneUse()) - return SDValue(); - - EVT BCVT = InVec.getOperand(0).getValueType(); - if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType())) - return SDValue(); - if (VT.getVectorNumElements() != BCVT.getVectorNumElements()) - BCNumEltsChanged = true; - InVec = InVec.getOperand(0); - ExtVT = BCVT.getVectorElementType(); - NewLoad = true; - } LoadSDNode *LN0 = nullptr; const ShuffleVectorSDNode *SVN = nullptr; @@ -9842,6 +10011,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { if (ISD::isNormalLoad(InVec.getNode())) { LN0 = cast<LoadSDNode>(InVec); Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems; + EltNo = DAG.getConstant(Elt, EltNo.getValueType()); } } @@ -9854,72 +10024,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { if (Elt == -1) return DAG.getUNDEF(LVT); - unsigned Align = LN0->getAlignment(); - if (NewLoad) { - // Check the resultant load doesn't need a higher alignment than the - // original load. - unsigned NewAlign = - TLI.getDataLayout() - ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext())); - - if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT)) - return SDValue(); - - Align = NewAlign; - } - - SDValue NewPtr = LN0->getBasePtr(); - unsigned PtrOff = 0; - - if (Elt) { - PtrOff = LVT.getSizeInBits() * Elt / 8; - EVT PtrType = NewPtr.getValueType(); - if (TLI.isBigEndian()) - PtrOff = VT.getSizeInBits() / 8 - PtrOff; - NewPtr = DAG.getNode(ISD::ADD, SDLoc(N), PtrType, NewPtr, - DAG.getConstant(PtrOff, PtrType)); - } - - // The replacement we need to do here is a little tricky: we need to - // replace an extractelement of a load with a load. - // Use ReplaceAllUsesOfValuesWith to do the replacement. - // Note that this replacement assumes that the extractvalue is the only - // use of the load; that's okay because we don't want to perform this - // transformation in other cases anyway. - SDValue Load; - SDValue Chain; - if (NVT.bitsGT(LVT)) { - // If the result type of vextract is wider than the load, then issue an - // extending load instead. - ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT) - ? ISD::ZEXTLOAD : ISD::EXTLOAD; - Load = DAG.getExtLoad(ExtType, SDLoc(N), NVT, LN0->getChain(), - NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), - LVT, LN0->isVolatile(), LN0->isNonTemporal(), - Align, LN0->getTBAAInfo()); - Chain = Load.getValue(1); - } else { - Load = DAG.getLoad(LVT, SDLoc(N), LN0->getChain(), NewPtr, - LN0->getPointerInfo().getWithOffset(PtrOff), - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->isInvariant(), Align, LN0->getTBAAInfo()); - Chain = Load.getValue(1); - if (NVT.bitsLT(LVT)) - Load = DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, Load); - else - Load = DAG.getNode(ISD::BITCAST, SDLoc(N), NVT, Load); - } - WorkListRemover DeadNodes(*this); - SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) }; - SDValue To[] = { Load, Chain }; - DAG.ReplaceAllUsesOfValuesWith(From, To, 2); - // Since we're explcitly calling ReplaceAllUses, add the new node to the - // worklist explicitly as well. - AddToWorkList(Load.getNode()); - AddUsersToWorkList(Load.getNode()); // Add users too - // Make sure to revisit this node to clean it up; it will usually be dead. - AddToWorkList(N); - return SDValue(N, 0); + return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0); } return SDValue(); @@ -10280,10 +10385,24 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { SmallVector<SDValue, 8> Opnds; unsigned BuildVecNumElts = N0.getNumOperands(); - for (unsigned i = 0; i != BuildVecNumElts; ++i) - Opnds.push_back(N0.getOperand(i)); - for (unsigned i = 0; i != BuildVecNumElts; ++i) - Opnds.push_back(N1.getOperand(i)); + EVT SclTy0 = N0.getOperand(0)->getValueType(0); + EVT SclTy1 = N1.getOperand(0)->getValueType(0); + if (SclTy0.isFloatingPoint()) { + for (unsigned i = 0; i != BuildVecNumElts; ++i) + Opnds.push_back(N0.getOperand(i)); + for (unsigned i = 0; i != BuildVecNumElts; ++i) + Opnds.push_back(N1.getOperand(i)); + } else { + // If BUILD_VECTOR are from built from integer, they may have different + // operand types. Get the smaller type and truncate all operands to it. + EVT MinTy = SclTy0.bitsLE(SclTy1) ? SclTy0 : SclTy1; + for (unsigned i = 0; i != BuildVecNumElts; ++i) + Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy, + N0.getOperand(i))); + for (unsigned i = 0; i != BuildVecNumElts; ++i) + Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy, + N1.getOperand(i))); + } return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds); } @@ -10558,22 +10677,19 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { } // If this shuffle node is simply a swizzle of another shuffle node, - // and it reverses the swizzle of the previous shuffle then we can - // optimize shuffle(shuffle(x, undef), undef) -> x. + // then try to simplify it. if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && N1.getOpcode() == ISD::UNDEF) { ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0); - // Shuffle nodes can only reverse shuffles with a single non-undef value. - if (N0.getOperand(1).getOpcode() != ISD::UNDEF) - return SDValue(); - // The incoming shuffle must be of the same type as the result of the // current shuffle. assert(OtherSV->getOperand(0).getValueType() == VT && "Shuffle types don't match"); + SmallVector<int, 4> Mask; + // Compute the combined shuffle mask. for (unsigned i = 0; i != NumElts; ++i) { int Idx = SVN->getMaskElt(i); assert(Idx < (int)NumElts && "Index references undef operand"); @@ -10581,13 +10697,71 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // shuffle. Adopt the incoming index. if (Idx >= 0) Idx = OtherSV->getMaskElt(Idx); + Mask.push_back(Idx); + } + + bool CommuteOperands = false; + if (N0.getOperand(1).getOpcode() != ISD::UNDEF) { + // To be valid, the combine shuffle mask should only reference elements + // from one of the two vectors in input to the inner shufflevector. + bool IsValidMask = true; + for (unsigned i = 0; i != NumElts && IsValidMask; ++i) + // See if the combined mask only reference undefs or elements coming + // from the first shufflevector operand. + IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] < NumElts; + + if (!IsValidMask) { + IsValidMask = true; + for (unsigned i = 0; i != NumElts && IsValidMask; ++i) + // Check that all the elements come from the second shuffle operand. + IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] >= NumElts; + CommuteOperands = IsValidMask; + } - // The combined shuffle must map each index to itself. - if (Idx >= 0 && (unsigned)Idx != i) + // Early exit if the combined shuffle mask is not valid. + if (!IsValidMask) return SDValue(); } - return OtherSV->getOperand(0); + // See if this pair of shuffles can be safely folded according to either + // of the following rules: + // shuffle(shuffle(x, y), undef) -> x + // shuffle(shuffle(x, undef), undef) -> x + // shuffle(shuffle(x, y), undef) -> y + bool IsIdentityMask = true; + unsigned BaseMaskIndex = CommuteOperands ? NumElts : 0; + for (unsigned i = 0; i != NumElts && IsIdentityMask; ++i) { + // Skip Undefs. + if (Mask[i] < 0) + continue; + + // The combined shuffle must map each index to itself. + IsIdentityMask = (unsigned)Mask[i] == i + BaseMaskIndex; + } + + if (IsIdentityMask) { + if (CommuteOperands) + // optimize shuffle(shuffle(x, y), undef) -> y. + return OtherSV->getOperand(1); + + // optimize shuffle(shuffle(x, undef), undef) -> x + // optimize shuffle(shuffle(x, y), undef) -> x + return OtherSV->getOperand(0); + } + + // It may still be beneficial to combine the two shuffles if the + // resulting shuffle is legal. + if (TLI.isShuffleMaskLegal(Mask, VT)) { + if (!CommuteOperands) + // shuffle(shuffle(x, undef, M1), undef, M2) -> shuffle(x, undef, M3). + // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(x, undef, M3) + return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), N1, + &Mask[0]); + + // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(undef, y, M3) + return DAG.getVectorShuffle(VT, SDLoc(N), N1, N0->getOperand(1), + &Mask[0]); + } } return SDValue(); @@ -10729,6 +10903,27 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), LHS.getValueType(), Ops); } + // Type legalization might introduce new shuffles in the DAG. + // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask))) + // -> (shuffle (VBinOp (A, B)), Undef, Mask). + if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) && + isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() && + LHS.getOperand(1).getOpcode() == ISD::UNDEF && + RHS.getOperand(1).getOpcode() == ISD::UNDEF) { + ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS); + ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS); + + if (SVN0->getMask().equals(SVN1->getMask())) { + EVT VT = N->getValueType(0); + SDValue UndefVector = LHS.getOperand(1); + SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT, + LHS.getOperand(0), RHS.getOperand(0)); + AddUsersToWorkList(N); + return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector, + &SVN0->getMask()[0]); + } + } + return SDValue(); } @@ -11080,8 +11275,8 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, // fold select C, 16, 0 -> shl C, 4 if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() && - TLI.getBooleanContents(N0.getValueType().isVector()) == - TargetLowering::ZeroOrOneBooleanContent) { + TLI.getBooleanContents(N0.getValueType()) == + TargetLowering::ZeroOrOneBooleanContent) { // If the caller doesn't want us to simplify this into a zext of a compare, // don't do it. diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 99931c1..445572a 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -42,12 +42,15 @@ #include "llvm/CodeGen/FastISel.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/Loads.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" @@ -558,6 +561,107 @@ bool FastISel::SelectGetElementPtr(const User *I) { return true; } +/// \brief Add a stackmap or patchpoint intrinsic call's live variable operands +/// to a stackmap or patchpoint machine instruction. +bool FastISel::addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops, + const CallInst *CI, unsigned StartIdx) { + for (unsigned i = StartIdx, e = CI->getNumArgOperands(); i != e; ++i) { + Value *Val = CI->getArgOperand(i); + // Check for constants and encode them with a StackMaps::ConstantOp prefix. + if (auto *C = dyn_cast<ConstantInt>(Val)) { + Ops.push_back(MachineOperand::CreateImm(StackMaps::ConstantOp)); + Ops.push_back(MachineOperand::CreateImm(C->getSExtValue())); + } else if (isa<ConstantPointerNull>(Val)) { + Ops.push_back(MachineOperand::CreateImm(StackMaps::ConstantOp)); + Ops.push_back(MachineOperand::CreateImm(0)); + } else if (auto *AI = dyn_cast<AllocaInst>(Val)) { + // Values coming from a stack location also require a sepcial encoding, + // but that is added later on by the target specific frame index + // elimination implementation. + auto SI = FuncInfo.StaticAllocaMap.find(AI); + if (SI != FuncInfo.StaticAllocaMap.end()) + Ops.push_back(MachineOperand::CreateFI(SI->second)); + else + return false; + } else { + unsigned Reg = getRegForValue(Val); + if (Reg == 0) + return false; + Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false)); + } + } + + return true; +} + +bool FastISel::SelectStackmap(const CallInst *I) { + // void @llvm.experimental.stackmap(i64 <id>, i32 <numShadowBytes>, + // [live variables...]) + assert(I->getCalledFunction()->getReturnType()->isVoidTy() && + "Stackmap cannot return a value."); + + // The stackmap intrinsic only records the live variables (the arguments + // passed to it) and emits NOPS (if requested). Unlike the patchpoint + // intrinsic, this won't be lowered to a function call. This means we don't + // have to worry about calling conventions and target-specific lowering code. + // Instead we perform the call lowering right here. + // + // CALLSEQ_START(0) + // STACKMAP(id, nbytes, ...) + // CALLSEQ_END(0, 0) + // + SmallVector<MachineOperand, 32> Ops; + + // Add the <id> and <numBytes> constants. + assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::IDPos)) && + "Expected a constant integer."); + const auto *ID = cast<ConstantInt>(I->getOperand(PatchPointOpers::IDPos)); + Ops.push_back(MachineOperand::CreateImm(ID->getZExtValue())); + + assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)) && + "Expected a constant integer."); + const auto *NumBytes = + cast<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)); + Ops.push_back(MachineOperand::CreateImm(NumBytes->getZExtValue())); + + // Push live variables for the stack map (skipping the first two arguments + // <id> and <numBytes>). + if (!addStackMapLiveVars(Ops, I, 2)) + return false; + + // We are not adding any register mask info here, because the stackmap doesn't + // clobber anything. + + // Add scratch registers as implicit def and early clobber. + CallingConv::ID CC = I->getCallingConv(); + const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC); + for (unsigned i = 0; ScratchRegs[i]; ++i) + Ops.push_back(MachineOperand::CreateReg( + ScratchRegs[i], /*IsDef=*/true, /*IsImp=*/true, /*IsKill=*/false, + /*IsDead=*/false, /*IsUndef=*/false, /*IsEarlyClobber=*/true)); + + // Issue CALLSEQ_START + unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)) + .addImm(0); + + // Issue STACKMAP. + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::STACKMAP)); + for (auto const &MO : Ops) + MIB.addOperand(MO); + + // Issue CALLSEQ_END + unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp)) + .addImm(0).addImm(0); + + // Inform the Frame Information that we have a stackmap in this function. + FuncInfo.MF->getFrameInfo()->setHasStackMap(); + + return true; +} + bool FastISel::SelectCall(const User *I) { const CallInst *Call = cast<CallInst>(I); @@ -713,6 +817,8 @@ bool FastISel::SelectCall(const User *I) { UpdateValueMap(Call, ResultReg); return true; } + case Intrinsic::experimental_stackmap: + return SelectStackmap(Call); } // Usually, it does not make sense to initialize a value, @@ -879,7 +985,6 @@ FastISel::SelectInstruction(const Instruction *I) { /// the CFG. void FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DbgLoc) { - if (FuncInfo.MBB->getBasicBlock()->size() > 1 && FuncInfo.MBB->isLayoutSuccessor(MSucc)) { // For more accurate line information if this is the only instruction @@ -890,7 +995,11 @@ FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DbgLoc) { TII.InsertBranch(*FuncInfo.MBB, MSucc, nullptr, SmallVector<MachineOperand, 0>(), DbgLoc); } - FuncInfo.MBB->addSuccessor(MSucc); + uint32_t BranchWeight = 0; + if (FuncInfo.BPI) + BranchWeight = FuncInfo.BPI->getEdgeWeight(FuncInfo.MBB->getBasicBlock(), + MSucc->getBasicBlock()); + FuncInfo.MBB->addSuccessor(MSucc, BranchWeight); } /// SelectFNeg - Emit an FNeg operation. @@ -1101,6 +1210,7 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) { FastISel::FastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) : FuncInfo(funcInfo), + MF(funcInfo.MF), MRI(FuncInfo.MF->getRegInfo()), MFI(*FuncInfo.MF->getFrameInfo()), MCP(*FuncInfo.MF->getConstantPool()), @@ -1635,3 +1745,47 @@ bool FastISel::canFoldAddIntoGEP(const User *GEP, const Value *Add) { return isa<ConstantInt>(cast<AddOperator>(Add)->getOperand(1)); } +MachineMemOperand * +FastISel::createMachineMemOperandFor(const Instruction *I) const { + const Value *Ptr; + Type *ValTy; + unsigned Alignment; + unsigned Flags; + bool IsVolatile; + + if (const auto *LI = dyn_cast<LoadInst>(I)) { + Alignment = LI->getAlignment(); + IsVolatile = LI->isVolatile(); + Flags = MachineMemOperand::MOLoad; + Ptr = LI->getPointerOperand(); + ValTy = LI->getType(); + } else if (const auto *SI = dyn_cast<StoreInst>(I)) { + Alignment = SI->getAlignment(); + IsVolatile = SI->isVolatile(); + Flags = MachineMemOperand::MOStore; + Ptr = SI->getPointerOperand(); + ValTy = SI->getValueOperand()->getType(); + } else { + return nullptr; + } + + bool IsNonTemporal = I->getMetadata("nontemporal") != nullptr; + bool IsInvariant = I->getMetadata("invariant.load") != nullptr; + const MDNode *TBAAInfo = I->getMetadata(LLVMContext::MD_tbaa); + const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range); + + if (Alignment == 0) // Ensure that codegen never sees alignment 0. + Alignment = DL.getABITypeAlignment(ValTy); + + unsigned Size = TM.getDataLayout()->getTypeStoreSize(ValTy); + + if (IsVolatile) + Flags |= MachineMemOperand::MOVolatile; + if (IsNonTemporal) + Flags |= MachineMemOperand::MONonTemporal; + if (IsInvariant) + Flags |= MachineMemOperand::MOInvariant; + + return FuncInfo.MF->getMachineMemOperand(MachinePointerInfo(Ptr), Flags, Size, + Alignment, TBAAInfo, Ranges); +} diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index a59e895..c0e8c8c 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2060,7 +2060,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(SDLoc(Node)).setChain(InChain) - .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, &Args, 0) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) .setTailCall(isTailCall).setSExtResult(isSigned).setZExtResult(!isSigned); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); @@ -2095,7 +2095,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) - .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, &Args, 0) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) .setSExtResult(isSigned).setZExtResult(!isSigned); std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(CLI); @@ -2129,7 +2129,7 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(SDLoc(Node)).setChain(InChain) - .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, &Args, 0) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) .setSExtResult(isSigned).setZExtResult(!isSigned); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); @@ -2266,7 +2266,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, SDLoc dl(Node); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(InChain) - .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, &Args, 0) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) .setSExtResult(isSigned).setZExtResult(!isSigned); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); @@ -2381,7 +2381,7 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(InChain) .setCallee(TLI.getLibcallCallingConv(LC), - Type::getVoidTy(*DAG.getContext()), Callee, &Args, 0); + Type::getVoidTy(*DAG.getContext()), Callee, std::move(Args), 0); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); @@ -2650,12 +2650,15 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT().SimpleTy+1); assert(NewOutTy.isInteger() && "Ran out of possibilities!"); + // A larger signed type can hold all unsigned values of the requested type, + // so using FP_TO_SINT is valid if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewOutTy)) { OpToUse = ISD::FP_TO_SINT; break; } - if (TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewOutTy)) { + // However, if the value may be < 0.0, we *must* use some FP_TO_SINT. + if (!isSigned && TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewOutTy)) { OpToUse = ISD::FP_TO_UINT; break; } @@ -2996,8 +2999,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(Node->getOperand(0)) .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), - DAG.getExternalSymbol("__sync_synchronize", TLI.getPointerTy()), - &Args, 0); + DAG.getExternalSymbol("__sync_synchronize", + TLI.getPointerTy()), std::move(Args), 0); std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); @@ -3007,14 +3010,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::ATOMIC_LOAD: { // There is no libcall for atomic load; fake it with ATOMIC_CMP_SWAP. SDValue Zero = DAG.getConstant(0, Node->getValueType(0)); - SDValue Swap = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, - cast<AtomicSDNode>(Node)->getMemoryVT(), - Node->getOperand(0), - Node->getOperand(1), Zero, Zero, - cast<AtomicSDNode>(Node)->getMemOperand(), - cast<AtomicSDNode>(Node)->getOrdering(), - cast<AtomicSDNode>(Node)->getOrdering(), - cast<AtomicSDNode>(Node)->getSynchScope()); + SDVTList VTs = DAG.getVTList(Node->getValueType(0), MVT::Other); + SDValue Swap = DAG.getAtomicCmpSwap( + ISD::ATOMIC_CMP_SWAP, dl, cast<AtomicSDNode>(Node)->getMemoryVT(), VTs, + Node->getOperand(0), Node->getOperand(1), Zero, Zero, + cast<AtomicSDNode>(Node)->getMemOperand(), + cast<AtomicSDNode>(Node)->getOrdering(), + cast<AtomicSDNode>(Node)->getOrdering(), + cast<AtomicSDNode>(Node)->getSynchScope()); Results.push_back(Swap.getValue(0)); Results.push_back(Swap.getValue(1)); break; @@ -3051,6 +3054,27 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Tmp.second); break; } + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: { + // Expanding an ATOMIC_CMP_SWAP_WITH_SUCCESS produces an ATOMIC_CMP_SWAP and + // splits out the success value as a comparison. Expanding the resulting + // ATOMIC_CMP_SWAP will produce a libcall. + SDVTList VTs = DAG.getVTList(Node->getValueType(0), MVT::Other); + SDValue Res = DAG.getAtomicCmpSwap( + ISD::ATOMIC_CMP_SWAP, dl, cast<AtomicSDNode>(Node)->getMemoryVT(), VTs, + Node->getOperand(0), Node->getOperand(1), Node->getOperand(2), + Node->getOperand(3), cast<MemSDNode>(Node)->getMemOperand(), + cast<AtomicSDNode>(Node)->getSuccessOrdering(), + cast<AtomicSDNode>(Node)->getFailureOrdering(), + cast<AtomicSDNode>(Node)->getSynchScope()); + + SDValue Success = DAG.getSetCC(SDLoc(Node), Node->getValueType(1), + Res, Node->getOperand(2), ISD::SETEQ); + + Results.push_back(Res.getValue(0)); + Results.push_back(Success); + Results.push_back(Res.getValue(1)); + break; + } case ISD::DYNAMIC_STACKALLOC: ExpandDYNAMIC_STACKALLOC(Node, Results); break; @@ -3074,7 +3098,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(Node->getOperand(0)) .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), - DAG.getExternalSymbol("abort", TLI.getPointerTy()), &Args, 0); + DAG.getExternalSymbol("abort", TLI.getPointerTy()), + std::move(Args), 0); std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); Results.push_back(CallResult.second); @@ -3128,6 +3153,65 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Node->getOperand(0), Node->getValueType(0), dl); Results.push_back(Tmp1); break; + case ISD::FP_TO_SINT: { + EVT VT = Node->getOperand(0).getValueType(); + EVT NVT = Node->getValueType(0); + + // FIXME: Only f32 to i64 conversions are supported. + if (VT != MVT::f32 || NVT != MVT::i64) + break; + + // Expand f32 -> i64 conversion + // This algorithm comes from compiler-rt's implementation of fixsfdi: + // https://github.com/llvm-mirror/compiler-rt/blob/master/lib/builtins/fixsfdi.c + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), + VT.getSizeInBits()); + SDValue ExponentMask = DAG.getConstant(0x7F800000, IntVT); + SDValue ExponentLoBit = DAG.getConstant(23, IntVT); + SDValue Bias = DAG.getConstant(127, IntVT); + SDValue SignMask = DAG.getConstant(APInt::getSignBit(VT.getSizeInBits()), + IntVT); + SDValue SignLowBit = DAG.getConstant(VT.getSizeInBits() - 1, IntVT); + SDValue MantissaMask = DAG.getConstant(0x007FFFFF, IntVT); + + SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Node->getOperand(0)); + + SDValue ExponentBits = DAG.getNode(ISD::SRL, dl, IntVT, + DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask), + DAG.getZExtOrTrunc(ExponentLoBit, dl, TLI.getShiftAmountTy(IntVT))); + SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias); + + SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT, + DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask), + DAG.getZExtOrTrunc(SignLowBit, dl, TLI.getShiftAmountTy(IntVT))); + Sign = DAG.getSExtOrTrunc(Sign, dl, NVT); + + SDValue R = DAG.getNode(ISD::OR, dl, IntVT, + DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask), + DAG.getConstant(0x00800000, IntVT)); + + R = DAG.getZExtOrTrunc(R, dl, NVT); + + + R = DAG.getSelectCC(dl, Exponent, ExponentLoBit, + DAG.getNode(ISD::SHL, dl, NVT, R, + DAG.getZExtOrTrunc( + DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit), + dl, TLI.getShiftAmountTy(IntVT))), + DAG.getNode(ISD::SRL, dl, NVT, R, + DAG.getZExtOrTrunc( + DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent), + dl, TLI.getShiftAmountTy(IntVT))), + ISD::SETGT); + + SDValue Ret = DAG.getNode(ISD::SUB, dl, NVT, + DAG.getNode(ISD::XOR, dl, NVT, R, Sign), + Sign); + + Results.push_back(DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, IntVT), + DAG.getConstant(0, NVT), Ret, ISD::SETLT)); + break; + } case ISD::FP_TO_UINT: { SDValue True, False; EVT VT = Node->getOperand(0).getValueType(); @@ -3653,7 +3737,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { ISD::ADD : ISD::SUB, dl, LHS.getValueType(), LHS, RHS); Results.push_back(Sum); - EVT OType = Node->getValueType(1); + EVT ResultType = Node->getValueType(1); + EVT OType = getSetCCResultType(Node->getValueType(0)); SDValue Zero = DAG.getConstant(0, LHS.getValueType()); @@ -3676,7 +3761,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE); SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE); - Results.push_back(Cmp); + Results.push_back(DAG.getBoolExtOrTrunc(Cmp, dl, ResultType, ResultType)); break; } case ISD::UADDO: @@ -3687,9 +3772,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { ISD::ADD : ISD::SUB, dl, LHS.getValueType(), LHS, RHS); Results.push_back(Sum); - Results.push_back(DAG.getSetCC(dl, Node->getValueType(1), Sum, LHS, - Node->getOpcode () == ISD::UADDO ? - ISD::SETULT : ISD::SETUGT)); + + EVT ResultType = Node->getValueType(1); + EVT SetCCType = getSetCCResultType(Node->getValueType(0)); + ISD::CondCode CC + = Node->getOpcode() == ISD::UADDO ? ISD::SETULT : ISD::SETUGT; + SDValue SetCC = DAG.getSetCC(dl, SetCCType, Sum, LHS, CC); + + Results.push_back(DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType)); break; } case ISD::UMULO: @@ -3879,7 +3969,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // illegal; expand it into a SELECT_CC. EVT VT = Node->getValueType(0); int TrueValue; - switch (TLI.getBooleanContents(VT.isVector())) { + switch (TLI.getBooleanContents(Tmp1->getValueType(0))) { case TargetLowering::ZeroOrOneBooleanContent: case TargetLowering::UndefinedBooleanContent: TrueValue = 1; @@ -3899,13 +3989,29 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp2 = Node->getOperand(1); // RHS Tmp3 = Node->getOperand(2); // True Tmp4 = Node->getOperand(3); // False + EVT VT = Node->getValueType(0); SDValue CC = Node->getOperand(4); + ISD::CondCode CCOp = cast<CondCodeSDNode>(CC)->get(); + + if (TLI.isCondCodeLegal(CCOp, Tmp1.getSimpleValueType())) { + // If the condition code is legal, then we need to expand this + // node using SETCC and SELECT. + EVT CmpVT = Tmp1.getValueType(); + assert(!TLI.isOperationExpand(ISD::SELECT, VT) && + "Cannot expand ISD::SELECT_CC when ISD::SELECT also needs to be " + "expanded."); + EVT CCVT = TLI.getSetCCResultType(*DAG.getContext(), CmpVT); + SDValue Cond = DAG.getNode(ISD::SETCC, dl, CCVT, Tmp1, Tmp2, CC); + Results.push_back(DAG.getSelect(dl, VT, Cond, Tmp3, Tmp4)); + break; + } + // SELECT_CC is legal, so the condition code must not be. bool Legalized = false; // Try to legalize by inverting the condition. This is for targets that // might support an ordered version of a condition, but not the unordered // version (or vice versa). - ISD::CondCode InvCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), + ISD::CondCode InvCC = ISD::getSetCCInverse(CCOp, Tmp1.getValueType().isInteger()); if (TLI.isCondCodeLegal(InvCC, Tmp1.getSimpleValueType())) { // Use the new condition code and swap true and false diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 2483184..6feac0d 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -138,7 +138,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { Res = PromoteIntRes_Atomic1(cast<AtomicSDNode>(N)); break; case ISD::ATOMIC_CMP_SWAP: - Res = PromoteIntRes_Atomic2(cast<AtomicSDNode>(N)); break; + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: + Res = PromoteIntRes_AtomicCmpSwap(cast<AtomicSDNode>(N), ResNo); + break; } // If the result is null then the sub-method took care of registering it. @@ -192,16 +194,41 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) { return Res; } -SDValue DAGTypeLegalizer::PromoteIntRes_Atomic2(AtomicSDNode *N) { +SDValue DAGTypeLegalizer::PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N, + unsigned ResNo) { + if (ResNo == 1) { + assert(N->getOpcode() == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS); + EVT SVT = getSetCCResultType(N->getOperand(2).getValueType()); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1)); + + // Only use the result of getSetCCResultType if it is legal, + // otherwise just use the promoted result type (NVT). + if (!TLI.isTypeLegal(SVT)) + SVT = NVT; + + SDVTList VTs = DAG.getVTList(N->getValueType(0), SVT, MVT::Other); + SDValue Res = DAG.getAtomicCmpSwap( + ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, SDLoc(N), N->getMemoryVT(), VTs, + N->getChain(), N->getBasePtr(), N->getOperand(2), N->getOperand(3), + N->getMemOperand(), N->getSuccessOrdering(), N->getFailureOrdering(), + N->getSynchScope()); + ReplaceValueWith(SDValue(N, 0), Res.getValue(0)); + ReplaceValueWith(SDValue(N, 2), Res.getValue(2)); + return Res.getValue(1); + } + SDValue Op2 = GetPromotedInteger(N->getOperand(2)); SDValue Op3 = GetPromotedInteger(N->getOperand(3)); - SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N), N->getMemoryVT(), - N->getChain(), N->getBasePtr(), Op2, Op3, - N->getMemOperand(), N->getSuccessOrdering(), - N->getFailureOrdering(), N->getSynchScope()); + SDVTList VTs = + DAG.getVTList(Op2.getValueType(), N->getValueType(1), MVT::Other); + SDValue Res = DAG.getAtomicCmpSwap( + N->getOpcode(), SDLoc(N), N->getMemoryVT(), VTs, N->getChain(), + N->getBasePtr(), Op2, Op3, N->getMemOperand(), N->getSuccessOrdering(), + N->getFailureOrdering(), N->getSynchScope()); // Legalized the chain result - switch anything that used the old chain to // use the new one. - ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + unsigned ChainOp = N->getNumValues() - 1; + ReplaceValueWith(SDValue(N, ChainOp), Res.getValue(ChainOp)); return Res; } @@ -492,7 +519,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) { EVT OpTy = N->getOperand(1).getValueType(); // Promote all the way up to the canonical SetCC type. - Mask = PromoteTargetBoolean(Mask, getSetCCResultType(OpTy)); + Mask = PromoteTargetBoolean(Mask, OpTy); SDValue LHS = GetPromotedInteger(N->getOperand(1)); SDValue RHS = GetPromotedInteger(N->getOperand(2)); return DAG.getNode(ISD::VSELECT, SDLoc(N), @@ -892,8 +919,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) { assert(OpNo == 1 && "only know how to promote condition"); // Promote all the way up to the canonical SetCC type. - EVT SVT = getSetCCResultType(MVT::Other); - SDValue Cond = PromoteTargetBoolean(N->getOperand(1), SVT); + SDValue Cond = PromoteTargetBoolean(N->getOperand(1), MVT::Other); // The chain (Op#0) and basic block destination (Op#2) are always legal types. return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Cond, @@ -986,9 +1012,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) { EVT OpTy = N->getOperand(1).getValueType(); // Promote all the way up to the canonical SetCC type. - EVT SVT = getSetCCResultType(N->getOpcode() == ISD::SELECT ? - OpTy.getScalarType() : OpTy); - Cond = PromoteTargetBoolean(Cond, SVT); + EVT OpVT = N->getOpcode() == ISD::SELECT ? OpTy.getScalarType() : OpTy; + Cond = PromoteTargetBoolean(Cond, OpVT); return SDValue(DAG.UpdateNodeOperands(N, Cond, N->getOperand(1), N->getOperand(2)), 0); @@ -1143,6 +1168,26 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { ReplaceValueWith(SDValue(N, 1), Tmp.second); break; } + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: { + AtomicSDNode *AN = cast<AtomicSDNode>(N); + SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::Other); + SDValue Tmp = DAG.getAtomicCmpSwap( + ISD::ATOMIC_CMP_SWAP, SDLoc(N), AN->getMemoryVT(), VTs, + N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3), + AN->getMemOperand(), AN->getSuccessOrdering(), AN->getFailureOrdering(), + AN->getSynchScope()); + + // Expanding to the strong ATOMIC_CMP_SWAP node means we can determine + // success simply by comparing the loaded value against the ingoing + // comparison. + SDValue Success = DAG.getSetCC(SDLoc(N), N->getValueType(1), Tmp, + N->getOperand(2), ISD::SETEQ); + + SplitInteger(Tmp, Lo, Hi); + ReplaceValueWith(SDValue(N, 1), Success); + ReplaceValueWith(SDValue(N, 2), Tmp.getValue(1)); + break; + } case ISD::AND: case ISD::OR: @@ -2301,7 +2346,7 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(Chain) - .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Func, &Args, 0) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Func, std::move(Args), 0) .setSExtResult(); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); @@ -2388,16 +2433,18 @@ void DAGTypeLegalizer::ExpandIntRes_ATOMIC_LOAD(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc dl(N); EVT VT = cast<AtomicSDNode>(N)->getMemoryVT(); + SDVTList VTs = DAG.getVTList(VT, MVT::i1, MVT::Other); SDValue Zero = DAG.getConstant(0, VT); - SDValue Swap = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, VT, - N->getOperand(0), - N->getOperand(1), Zero, Zero, - cast<AtomicSDNode>(N)->getMemOperand(), - cast<AtomicSDNode>(N)->getOrdering(), - cast<AtomicSDNode>(N)->getOrdering(), - cast<AtomicSDNode>(N)->getSynchScope()); + SDValue Swap = DAG.getAtomicCmpSwap( + ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, + cast<AtomicSDNode>(N)->getMemoryVT(), VTs, N->getOperand(0), + N->getOperand(1), Zero, Zero, cast<AtomicSDNode>(N)->getMemOperand(), + cast<AtomicSDNode>(N)->getOrdering(), + cast<AtomicSDNode>(N)->getOrdering(), + cast<AtomicSDNode>(N)->getSynchScope()); + ReplaceValueWith(SDValue(N, 0), Swap.getValue(0)); - ReplaceValueWith(SDValue(N, 1), Swap.getValue(1)); + ReplaceValueWith(SDValue(N, 1), Swap.getValue(2)); } //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 3971fc3..bd7dacf 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -1054,7 +1054,7 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(SDLoc(Node)).setChain(InChain) - .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, &Args, 0) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) .setSExtResult(isSigned).setZExtResult(!isSigned); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); @@ -1065,11 +1065,14 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, /// PromoteTargetBoolean - Promote the given target boolean to a target boolean /// of the given type. A target boolean is an integer value, not necessarily of /// type i1, the bits of which conform to getBooleanContents. -SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT VT) { +/// +/// ValVT is the type of values that produced the boolean. +SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT ValVT) { SDLoc dl(Bool); + EVT BoolVT = getSetCCResultType(ValVT); ISD::NodeType ExtendCode = - TargetLowering::getExtendForContent(TLI.getBooleanContents(VT.isVector())); - return DAG.getNode(ExtendCode, dl, VT, Bool); + TargetLowering::getExtendForContent(TLI.getBooleanContents(ValVT)); + return DAG.getNode(ExtendCode, dl, BoolVT, Bool); } /// SplitInteger - Return the lower LoVT bits of Op in Lo and the upper HiVT diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index e4bbc78..d0ca6f8 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -167,7 +167,7 @@ private: SDNode *Node, bool isSigned); std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node); - SDValue PromoteTargetBoolean(SDValue Bool, EVT VT); + SDValue PromoteTargetBoolean(SDValue Bool, EVT ValVT); void ReplaceValueWith(SDValue From, SDValue To); void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi); void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT, @@ -220,7 +220,7 @@ private: SDValue PromoteIntRes_AssertZext(SDNode *N); SDValue PromoteIntRes_Atomic0(AtomicSDNode *N); SDValue PromoteIntRes_Atomic1(AtomicSDNode *N); - SDValue PromoteIntRes_Atomic2(AtomicSDNode *N); + SDValue PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N, unsigned ResNo); SDValue PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N); SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N); SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N); @@ -570,6 +570,7 @@ private: void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi); @@ -644,6 +645,7 @@ private: bool WidenVectorOperand(SDNode *N, unsigned OpNo); SDValue WidenVecOp_BITCAST(SDNode *N); SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N); + SDValue WidenVecOp_EXTEND(SDNode *N); SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue WidenVecOp_STORE(SDNode* N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index f40ed76..7e2f7b6 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -60,12 +60,15 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { case TargetLowering::TypeExpandFloat: // Convert the expanded pieces of the input. GetExpandedOp(InOp, Lo, Hi); + if (TLI.hasBigEndianPartOrdering(InVT) != + TLI.hasBigEndianPartOrdering(OutVT)) + std::swap(Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); return; case TargetLowering::TypeSplitVector: GetSplitVector(InOp, Lo, Hi); - if (TLI.isBigEndian()) + if (TLI.hasBigEndianPartOrdering(OutVT)) std::swap(Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); @@ -82,7 +85,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT LoVT, HiVT; std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(InVT); std::tie(Lo, Hi) = DAG.SplitVector(InOp, dl, LoVT, HiVT); - if (TLI.isBigEndian()) + if (TLI.hasBigEndianPartOrdering(OutVT)) std::swap(Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); @@ -176,7 +179,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { false, false, MinAlign(Alignment, IncrementSize)); // Handle endianness of the load. - if (TLI.isBigEndian()) + if (TLI.hasBigEndianPartOrdering(OutVT)) std::swap(Lo, Hi); } @@ -245,7 +248,8 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, SDLoc dl(N); LoadSDNode *LD = cast<LoadSDNode>(N); - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0)); + EVT ValueVT = LD->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT); SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); unsigned Alignment = LD->getAlignment(); @@ -275,7 +279,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, Hi.getValue(1)); // Handle endianness of the load. - if (TLI.isBigEndian()) + if (TLI.hasBigEndianPartOrdering(ValueVT)) std::swap(Lo, Hi); // Modified the chain - switch anything that used the old chain to use @@ -295,7 +299,7 @@ void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) { Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2), 0); // Handle endianness of the load. - if (TLI.isBigEndian()) + if (TLI.hasBigEndianPartOrdering(OVT)) std::swap(Lo, Hi); // Modified the chain - switch anything that used the old chain to use @@ -459,8 +463,8 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { SDLoc dl(N); StoreSDNode *St = cast<StoreSDNode>(N); - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), - St->getValue().getValueType()); + EVT ValueVT = St->getValue().getValueType(); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT); SDValue Chain = St->getChain(); SDValue Ptr = St->getBasePtr(); unsigned Alignment = St->getAlignment(); @@ -474,7 +478,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { SDValue Lo, Hi; GetExpandedOp(St->getValue(), Lo, Hi); - if (TLI.isBigEndian()) + if (TLI.hasBigEndianPartOrdering(ValueVT)) std::swap(Lo, Hi); Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(), diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 898cd29..507e7ff 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -37,12 +37,12 @@ class VectorLegalizer { const TargetLowering &TLI; bool Changed; // Keep track of whether anything changed - /// LegalizedNodes - For nodes that are of legal width, and that have more - /// than one use, this map indicates what regularized operand to use. This - /// allows us to avoid legalizing the same thing more than once. + /// For nodes that are of legal width, and that have more than one use, this + /// map indicates what regularized operand to use. This allows us to avoid + /// legalizing the same thing more than once. SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes; - // Adds a node to the translation cache + /// \brief Adds a node to the translation cache. void AddLegalizedOperand(SDValue From, SDValue To) { LegalizedNodes.insert(std::make_pair(From, To)); // If someone requests legalization of the new node, return itself. @@ -50,41 +50,81 @@ class VectorLegalizer { LegalizedNodes.insert(std::make_pair(To, To)); } - // Legalizes the given node + /// \brief Legalizes the given node. SDValue LegalizeOp(SDValue Op); - // Assuming the node is legal, "legalize" the results + + /// \brief Assuming the node is legal, "legalize" the results. SDValue TranslateLegalizeResults(SDValue Op, SDValue Result); - // Implements unrolling a VSETCC. + + /// \brief Implements unrolling a VSETCC. SDValue UnrollVSETCC(SDValue Op); - // Implements expansion for FNEG; falls back to UnrollVectorOp if FSUB - // isn't legal. - // Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if - // SINT_TO_FLOAT and SHR on vectors isn't legal. + + /// \brief Implement expand-based legalization of vector operations. + /// + /// This is just a high-level routine to dispatch to specific code paths for + /// operations to legalize them. + SDValue Expand(SDValue Op); + + /// \brief Implements expansion for FNEG; falls back to UnrollVectorOp if + /// FSUB isn't legal. + /// + /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if + /// SINT_TO_FLOAT and SHR on vectors isn't legal. SDValue ExpandUINT_TO_FLOAT(SDValue Op); - // Implement expansion for SIGN_EXTEND_INREG using SRL and SRA. + + /// \brief Implement expansion for SIGN_EXTEND_INREG using SRL and SRA. SDValue ExpandSEXTINREG(SDValue Op); - // Expand bswap of vectors into a shuffle if legal. + + /// \brief Implement expansion for ANY_EXTEND_VECTOR_INREG. + /// + /// Shuffles the low lanes of the operand into place and bitcasts to the proper + /// type. The contents of the bits in the extended part of each element are + /// undef. + SDValue ExpandANY_EXTEND_VECTOR_INREG(SDValue Op); + + /// \brief Implement expansion for SIGN_EXTEND_VECTOR_INREG. + /// + /// Shuffles the low lanes of the operand into place, bitcasts to the proper + /// type, then shifts left and arithmetic shifts right to introduce a sign + /// extension. + SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op); + + /// \brief Implement expansion for ZERO_EXTEND_VECTOR_INREG. + /// + /// Shuffles the low lanes of the operand into place and blends zeros into + /// the remaining lanes, finally bitcasting to the proper type. + SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op); + + /// \brief Expand bswap of vectors into a shuffle if legal. SDValue ExpandBSWAP(SDValue Op); - // Implement vselect in terms of XOR, AND, OR when blend is not supported - // by the target. + + /// \brief Implement vselect in terms of XOR, AND, OR when blend is not + /// supported by the target. SDValue ExpandVSELECT(SDValue Op); SDValue ExpandSELECT(SDValue Op); SDValue ExpandLoad(SDValue Op); SDValue ExpandStore(SDValue Op); SDValue ExpandFNEG(SDValue Op); - // Implements vector promotion; this is essentially just bitcasting the - // operands to a different type and bitcasting the result back to the - // original type. - SDValue PromoteVectorOp(SDValue Op); - // Implements [SU]INT_TO_FP vector promotion; this is a [zs]ext of the input - // operand to the next size up. - SDValue PromoteVectorOpINT_TO_FP(SDValue Op); - // Implements FP_TO_[SU]INT vector promotion of the result type; it is - // promoted to the next size up integer type. The result is then truncated - // back to the original type. - SDValue PromoteVectorOpFP_TO_INT(SDValue Op, bool isSigned); - - public: + + /// \brief Implements vector promotion. + /// + /// This is essentially just bitcasting the operands to a different type and + /// bitcasting the result back to the original type. + SDValue Promote(SDValue Op); + + /// \brief Implements [SU]INT_TO_FP vector promotion. + /// + /// This is a [zs]ext of the input operand to the next size up. + SDValue PromoteINT_TO_FP(SDValue Op); + + /// \brief Implements FP_TO_[SU]INT vector promotion of the result type. + /// + /// It is promoted to the next size up integer type. The result is then + /// truncated back to the original type. + SDValue PromoteFP_TO_INT(SDValue Op, bool isSigned); + +public: + /// \brief Begin legalizer the vector operations in the DAG. bool Run(); VectorLegalizer(SelectionDAG& dag) : DAG(dag), TLI(dag.getTargetLoweringInfo()), Changed(false) {} @@ -254,6 +294,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FP_EXTEND: case ISD::FMA: case ISD::SIGN_EXTEND_INREG: + case ISD::ANY_EXTEND_VECTOR_INREG: + case ISD::SIGN_EXTEND_VECTOR_INREG: + case ISD::ZERO_EXTEND_VECTOR_INREG: QueryType = Node->getValueType(0); break; case ISD::FP_ROUND_INREG: @@ -267,27 +310,11 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) { case TargetLowering::Promote: - switch (Op.getOpcode()) { - default: - // "Promote" the operation by bitcasting - Result = PromoteVectorOp(Op); - Changed = true; - break; - case ISD::SINT_TO_FP: - case ISD::UINT_TO_FP: - // "Promote" the operation by extending the operand. - Result = PromoteVectorOpINT_TO_FP(Op); - Changed = true; - break; - case ISD::FP_TO_UINT: - case ISD::FP_TO_SINT: - // Promote the operation by extending the operand. - Result = PromoteVectorOpFP_TO_INT(Op, Op->getOpcode() == ISD::FP_TO_SINT); - Changed = true; - break; - } + Result = Promote(Op); + Changed = true; + break; + case TargetLowering::Legal: break; - case TargetLowering::Legal: break; case TargetLowering::Custom: { SDValue Tmp1 = TLI.LowerOperation(Op, DAG); if (Tmp1.getNode()) { @@ -297,23 +324,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { // FALL THROUGH } case TargetLowering::Expand: - if (Node->getOpcode() == ISD::SIGN_EXTEND_INREG) - Result = ExpandSEXTINREG(Op); - else if (Node->getOpcode() == ISD::BSWAP) - Result = ExpandBSWAP(Op); - else if (Node->getOpcode() == ISD::VSELECT) - Result = ExpandVSELECT(Op); - else if (Node->getOpcode() == ISD::SELECT) - Result = ExpandSELECT(Op); - else if (Node->getOpcode() == ISD::UINT_TO_FP) - Result = ExpandUINT_TO_FLOAT(Op); - else if (Node->getOpcode() == ISD::FNEG) - Result = ExpandFNEG(Op); - else if (Node->getOpcode() == ISD::SETCC) - Result = UnrollVSETCC(Op); - else - Result = DAG.UnrollVectorOp(Op.getNode()); - break; + Result = Expand(Op); } // Make sure that the generated code is itself legal. @@ -328,10 +339,23 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { return Result; } -SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) { - // Vector "promotion" is basically just bitcasting and doing the operation - // in a different type. For example, x86 promotes ISD::AND on v2i32 to - // v1i64. +SDValue VectorLegalizer::Promote(SDValue Op) { + // For a few operations there is a specific concept for promotion based on + // the operand's type. + switch (Op.getOpcode()) { + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + // "Promote" the operation by extending the operand. + return PromoteINT_TO_FP(Op); + case ISD::FP_TO_UINT: + case ISD::FP_TO_SINT: + // Promote the operation by extending the operand. + return PromoteFP_TO_INT(Op, Op->getOpcode() == ISD::FP_TO_SINT); + } + + // The rest of the time, vector "promotion" is basically just bitcasting and + // doing the operation in a different type. For example, x86 promotes + // ISD::AND on v2i32 to v1i64. MVT VT = Op.getSimpleValueType(); assert(Op.getNode()->getNumValues() == 1 && "Can't promote a vector with multiple results!"); @@ -351,7 +375,7 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) { return DAG.getNode(ISD::BITCAST, dl, VT, Op); } -SDValue VectorLegalizer::PromoteVectorOpINT_TO_FP(SDValue Op) { +SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) { // INT_TO_FP operations may require the input operand be promoted even // when the type is otherwise legal. EVT VT = Op.getOperand(0).getValueType(); @@ -387,7 +411,7 @@ SDValue VectorLegalizer::PromoteVectorOpINT_TO_FP(SDValue Op) { // elements and then truncate the result. This is different from the default // PromoteVector which uses bitcast to promote thus assumning that the // promoted vector type has the same overall size. -SDValue VectorLegalizer::PromoteVectorOpFP_TO_INT(SDValue Op, bool isSigned) { +SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op, bool isSigned) { assert(Op.getNode()->getNumValues() == 1 && "Can't promote a vector with multiple results!"); EVT VT = Op.getValueType(); @@ -609,6 +633,33 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) { return TF; } +SDValue VectorLegalizer::Expand(SDValue Op) { + switch (Op->getOpcode()) { + case ISD::SIGN_EXTEND_INREG: + return ExpandSEXTINREG(Op); + case ISD::ANY_EXTEND_VECTOR_INREG: + return ExpandANY_EXTEND_VECTOR_INREG(Op); + case ISD::SIGN_EXTEND_VECTOR_INREG: + return ExpandSIGN_EXTEND_VECTOR_INREG(Op); + case ISD::ZERO_EXTEND_VECTOR_INREG: + return ExpandZERO_EXTEND_VECTOR_INREG(Op); + case ISD::BSWAP: + return ExpandBSWAP(Op); + case ISD::VSELECT: + return ExpandVSELECT(Op); + case ISD::SELECT: + return ExpandSELECT(Op); + case ISD::UINT_TO_FP: + return ExpandUINT_TO_FLOAT(Op); + case ISD::FNEG: + return ExpandFNEG(Op); + case ISD::SETCC: + return UnrollVSETCC(Op); + default: + return DAG.UnrollVectorOp(Op.getNode()); + } +} + SDValue VectorLegalizer::ExpandSELECT(SDValue Op) { // Lower a select instruction where the condition is a scalar and the // operands are vectors. Lower this select to VSELECT and implement it @@ -686,6 +737,85 @@ SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) { return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz); } +// Generically expand a vector anyext in register to a shuffle of the relevant +// lanes into the appropriate locations, with other lanes left undef. +SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDValue Op) { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + int NumElements = VT.getVectorNumElements(); + SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + int NumSrcElements = SrcVT.getVectorNumElements(); + + // Build a base mask of undef shuffles. + SmallVector<int, 16> ShuffleMask; + ShuffleMask.resize(NumSrcElements, -1); + + // Place the extended lanes into the correct locations. + int ExtLaneScale = NumSrcElements / NumElements; + int EndianOffset = TLI.isBigEndian() ? ExtLaneScale - 1 : 0; + for (int i = 0; i < NumElements; ++i) + ShuffleMask[i * ExtLaneScale + EndianOffset] = i; + + return DAG.getNode( + ISD::BITCAST, DL, VT, + DAG.getVectorShuffle(SrcVT, DL, Src, DAG.getUNDEF(SrcVT), ShuffleMask)); +} + +SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + + // First build an any-extend node which can be legalized above when we + // recurse through it. + Op = DAG.getAnyExtendVectorInReg(Src, DL, VT); + + // Now we need sign extend. Do this by shifting the elements. Even if these + // aren't legal operations, they have a better chance of being legalized + // without full scalarization than the sign extension does. + unsigned EltWidth = VT.getVectorElementType().getSizeInBits(); + unsigned SrcEltWidth = SrcVT.getVectorElementType().getSizeInBits(); + SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, VT); + return DAG.getNode(ISD::SRA, DL, VT, + DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount), + ShiftAmount); +} + +// Generically expand a vector zext in register to a shuffle of the relevant +// lanes into the appropriate locations, a blend of zero into the high bits, +// and a bitcast to the wider element type. +SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + int NumElements = VT.getVectorNumElements(); + SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + int NumSrcElements = SrcVT.getVectorNumElements(); + + // Build up a zero vector to blend into this one. + EVT SrcScalarVT = SrcVT.getScalarType(); + SDValue ScalarZero = DAG.getTargetConstant(0, SrcScalarVT); + SmallVector<SDValue, 4> BuildVectorOperands(NumSrcElements, ScalarZero); + SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, DL, SrcVT, BuildVectorOperands); + + // Shuffle the incoming lanes into the correct position, and pull all other + // lanes from the zero vector. + SmallVector<int, 16> ShuffleMask; + ShuffleMask.reserve(NumSrcElements); + for (int i = 0; i < NumSrcElements; ++i) + ShuffleMask.push_back(i); + + int ExtLaneScale = NumSrcElements / NumElements; + int EndianOffset = TLI.isBigEndian() ? ExtLaneScale - 1 : 0; + for (int i = 0; i < NumElements; ++i) + ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i; + + return DAG.getNode(ISD::BITCAST, DL, VT, + DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask)); +} + SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) { EVT VT = Op.getValueType(); @@ -729,9 +859,9 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { // FIXME: Sign extend 1 to all ones if thats legal on the target. if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand || TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand || - TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand || - TLI.getBooleanContents(true) != - TargetLowering::ZeroOrNegativeOneBooleanContent) + TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand || + TLI.getBooleanContents(Op1.getValueType()) != + TargetLowering::ZeroOrNegativeOneBooleanContent) return DAG.UnrollVectorOp(Op.getNode()); // If the mask and the type are different sizes, unroll the vector op. This diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 368eba3..f77c592 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -257,8 +257,26 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) { SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) { SDValue Cond = GetScalarizedVector(N->getOperand(0)); SDValue LHS = GetScalarizedVector(N->getOperand(1)); - TargetLowering::BooleanContent ScalarBool = TLI.getBooleanContents(false); - TargetLowering::BooleanContent VecBool = TLI.getBooleanContents(true); + TargetLowering::BooleanContent ScalarBool = + TLI.getBooleanContents(false, false); + TargetLowering::BooleanContent VecBool = TLI.getBooleanContents(true, false); + + // If integer and float booleans have different contents then we can't + // reliably optimize in all cases. There is a full explanation for this in + // DAGCombiner::visitSELECT() where the same issue affects folding + // (select C, 0, 1) to (xor C, 1). + if (TLI.getBooleanContents(false, false) != + TLI.getBooleanContents(false, true)) { + // At least try the common case where the boolean is generated by a + // comparison. + if (Cond->getOpcode() == ISD::SETCC) { + EVT OpVT = Cond->getOperand(0)->getValueType(0); + ScalarBool = TLI.getBooleanContents(OpVT.getScalarType()); + VecBool = TLI.getBooleanContents(OpVT); + } else + ScalarBool = TargetLowering::UndefinedBooleanContent; + } + if (ScalarBool != VecBool) { EVT CondVT = Cond.getValueType(); switch (ScalarBool) { @@ -357,7 +375,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) { // Vectors may have a different boolean contents to scalars. Promote the // value appropriately. ISD::NodeType ExtendCode = - TargetLowering::getExtendForContent(TLI.getBooleanContents(true)); + TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT)); return DAG.getNode(ExtendCode, DL, NVT, Res); } @@ -545,6 +563,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::BUILD_VECTOR: SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break; case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break; case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break; + case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break; case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break; case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break; @@ -765,6 +784,43 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, TLI.getVectorIdxTy())); } +void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Vec = N->getOperand(0); + SDValue SubVec = N->getOperand(1); + SDValue Idx = N->getOperand(2); + SDLoc dl(N); + GetSplitVector(Vec, Lo, Hi); + + // Spill the vector to the stack. + EVT VecVT = Vec.getValueType(); + EVT SubVecVT = VecVT.getVectorElementType(); + SDValue StackPtr = DAG.CreateStackTemporary(VecVT); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, + MachinePointerInfo(), false, false, 0); + + // Store the new subvector into the specified index. + SDValue SubVecPtr = GetVectorElementPointer(StackPtr, SubVecVT, Idx); + Type *VecType = VecVT.getTypeForEVT(*DAG.getContext()); + unsigned Alignment = TLI.getDataLayout()->getPrefTypeAlignment(VecType); + Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, MachinePointerInfo(), + false, false, 0); + + // Load the Lo part from the stack slot. + Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), + false, false, false, 0); + + // Increment the pointer to the other part. + unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8; + StackPtr = + DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, + DAG.getConstant(IncrementSize, StackPtr.getValueType())); + + // Load the Hi part from the stack slot. + Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), + false, false, false, MinAlign(Alignment, IncrementSize)); +} + void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc dl(N); @@ -1511,7 +1567,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::ADD: case ISD::AND: - case ISD::BSWAP: case ISD::MUL: case ISD::MULHS: case ISD::MULHU: @@ -1558,6 +1613,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_Convert(N); break; + case ISD::BSWAP: case ISD::CTLZ: case ISD::CTPOP: case ISD::CTTZ: @@ -2343,15 +2399,18 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { case ISD::STORE: Res = WidenVecOp_STORE(N); break; case ISD::SETCC: Res = WidenVecOp_SETCC(N); break; + case ISD::ANY_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + Res = WidenVecOp_EXTEND(N); + break; + case ISD::FP_EXTEND: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: case ISD::TRUNCATE: - case ISD::SIGN_EXTEND: - case ISD::ZERO_EXTEND: - case ISD::ANY_EXTEND: Res = WidenVecOp_Convert(N); break; } @@ -2372,6 +2431,68 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { return false; } +SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) { + SDLoc DL(N); + EVT VT = N->getValueType(0); + + SDValue InOp = N->getOperand(0); + // If some legalization strategy other than widening is used on the operand, + // we can't safely assume that just extending the low lanes is the correct + // transformation. + if (getTypeAction(InOp.getValueType()) != TargetLowering::TypeWidenVector) + return WidenVecOp_Convert(N); + InOp = GetWidenedVector(InOp); + assert(VT.getVectorNumElements() < + InOp.getValueType().getVectorNumElements() && + "Input wasn't widened!"); + + // We may need to further widen the operand until it has the same total + // vector size as the result. + EVT InVT = InOp.getValueType(); + if (InVT.getSizeInBits() != VT.getSizeInBits()) { + EVT InEltVT = InVT.getVectorElementType(); + for (int i = MVT::FIRST_VECTOR_VALUETYPE, e = MVT::LAST_VECTOR_VALUETYPE; i < e; ++i) { + EVT FixedVT = (MVT::SimpleValueType)i; + EVT FixedEltVT = FixedVT.getVectorElementType(); + if (TLI.isTypeLegal(FixedVT) && + FixedVT.getSizeInBits() == VT.getSizeInBits() && + FixedEltVT == InEltVT) { + assert(FixedVT.getVectorNumElements() >= VT.getVectorNumElements() && + "Not enough elements in the fixed type for the operand!"); + assert(FixedVT.getVectorNumElements() != InVT.getVectorNumElements() && + "We can't have the same type as we started with!"); + if (FixedVT.getVectorNumElements() > InVT.getVectorNumElements()) + InOp = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, FixedVT, + DAG.getUNDEF(FixedVT), InOp, + DAG.getConstant(0, TLI.getVectorIdxTy())); + else + InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, FixedVT, InOp, + DAG.getConstant(0, TLI.getVectorIdxTy())); + break; + } + } + InVT = InOp.getValueType(); + if (InVT.getSizeInBits() != VT.getSizeInBits()) + // We couldn't find a legal vector type that was a widening of the input + // and could be extended in-register to the result type, so we have to + // scalarize. + return WidenVecOp_Convert(N); + } + + // Use special DAG nodes to represent the operation of extending the + // low lanes. + switch (N->getOpcode()) { + default: + llvm_unreachable("Extend legalization on on extend operation!"); + case ISD::ANY_EXTEND: + return DAG.getAnyExtendVectorInReg(InOp, DL, VT); + case ISD::SIGN_EXTEND: + return DAG.getSignExtendVectorInReg(InOp, DL, VT); + case ISD::ZERO_EXTEND: + return DAG.getZeroExtendVectorInReg(InOp, DL, VT); + } +} + SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { // Since the result is legal and the input is illegal, it is unlikely // that we can fix the input to a legal type so unroll the convert diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index f92230c..624003f 100644 --- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -442,7 +442,7 @@ signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) { ResCount -= (regPressureDelta(SU) * ScaleTwo); } - // These are platform specific things. + // These are platform-specific things. // Will need to go into the back end // and accessed from here via a hook. for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) { diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 78ec4df..13cfae7 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -170,7 +170,8 @@ public: if (DisableSchedCycles || !NeedLatency) HazardRec = new ScheduleHazardRecognizer(); else - HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this); + HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer( + tm.getSubtargetImpl(), this); } ~ScheduleDAGRRList() { diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index 51c51d6..4589b0c 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -73,7 +73,8 @@ public: : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) { const TargetMachine &tm = mf.getTarget(); - HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this); + HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer( + tm.getSubtargetImpl(), this); } ~ScheduleDAGVLIW() { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index b1b8035..daff1f2 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -48,6 +48,7 @@ #include "llvm/Target/TargetSelectionDAGInfo.h" #include <algorithm> #include <cmath> + using namespace llvm; /// makeVTList - Return an instance of the SDVTList struct initialized with the @@ -147,33 +148,34 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) { if (N->getOpcode() != ISD::BUILD_VECTOR) return false; - unsigned i = 0, e = N->getNumOperands(); - - // Skip over all of the undef values. - while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF) - ++i; + bool IsAllUndef = true; + for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) { + if (N->getOperand(i).getOpcode() == ISD::UNDEF) + continue; + IsAllUndef = false; + // Do not accept build_vectors that aren't all constants or which have non-0 + // elements. We have to be a bit careful here, as the type of the constant + // may not be the same as the type of the vector elements due to type + // legalization (the elements are promoted to a legal type for the target + // and a vector of a type may be legal when the base element type is not). + // We only want to check enough bits to cover the vector elements, because + // we care if the resultant vector is all zeros, not whether the individual + // constants are. + SDValue Zero = N->getOperand(i); + unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits(); + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Zero)) { + if (CN->getAPIntValue().countTrailingZeros() < EltSize) + return false; + } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(Zero)) { + if (CFPN->getValueAPF().bitcastToAPInt().countTrailingZeros() < EltSize) + return false; + } else + return false; + } // Do not accept an all-undef vector. - if (i == e) return false; - - // Do not accept build_vectors that aren't all constants or which have non-0 - // elements. - SDValue Zero = N->getOperand(i); - if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Zero)) { - if (!CN->isNullValue()) - return false; - } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(Zero)) { - if (!CFPN->getValueAPF().isPosZero()) - return false; - } else + if (IsAllUndef) return false; - - // Okay, we have at least one 0 value, check to see if the rest match or are - // undefs. - for (++i; i != e; ++i) - if (N->getOperand(i) != Zero && - N->getOperand(i).getOpcode() != ISD::UNDEF) - return false; return true; } @@ -381,6 +383,20 @@ static void AddNodeIDOperands(FoldingSetNodeID &ID, } } +static void AddBinaryNodeIDCustom(FoldingSetNodeID &ID, bool nuw, bool nsw, + bool exact) { + ID.AddBoolean(nuw); + ID.AddBoolean(nsw); + ID.AddBoolean(exact); +} + +/// AddBinaryNodeIDCustom - Add BinarySDNodes special infos +static void AddBinaryNodeIDCustom(FoldingSetNodeID &ID, unsigned Opcode, + bool nuw, bool nsw, bool exact) { + if (isBinOpWithFlags(Opcode)) + AddBinaryNodeIDCustom(ID, nuw, nsw, exact); +} + static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned short OpC, SDVTList VTList, ArrayRef<SDValue> OpList) { AddNodeIDOpcode(ID, OpC); @@ -473,7 +489,21 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(ST->getPointerInfo().getAddrSpace()); break; } + case ISD::SDIV: + case ISD::UDIV: + case ISD::SRA: + case ISD::SRL: + case ISD::MUL: + case ISD::ADD: + case ISD::SUB: + case ISD::SHL: { + const BinaryWithFlagsSDNode *BinNode = cast<BinaryWithFlagsSDNode>(N); + AddBinaryNodeIDCustom(ID, N->getOpcode(), BinNode->hasNoUnsignedWrap(), + BinNode->hasNoSignedWrap(), BinNode->isExact()); + break; + } case ISD::ATOMIC_CMP_SWAP: + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: case ISD::ATOMIC_SWAP: case ISD::ATOMIC_LOAD_ADD: case ISD::ATOMIC_LOAD_SUB: @@ -527,7 +557,7 @@ static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) { // Add the return value info. AddNodeIDValueTypes(ID, N->getVTList()); // Add the operand info. - AddNodeIDOperands(ID, makeArrayRef(N->op_begin(), N->op_end())); + AddNodeIDOperands(ID, N->ops()); // Handle SDNode leafs with special info. AddNodeIDCustom(ID, N); @@ -926,6 +956,25 @@ void SelectionDAG::allnodes_clear() { DeallocateNode(AllNodes.begin()); } +BinarySDNode *SelectionDAG::GetBinarySDNode(unsigned Opcode, SDLoc DL, + SDVTList VTs, SDValue N1, + SDValue N2, bool nuw, bool nsw, + bool exact) { + if (isBinOpWithFlags(Opcode)) { + BinaryWithFlagsSDNode *FN = new (NodeAllocator) BinaryWithFlagsSDNode( + Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2); + FN->setHasNoUnsignedWrap(nuw); + FN->setHasNoSignedWrap(nsw); + FN->setIsExact(exact); + + return FN; + } + + BinarySDNode *N = new (NodeAllocator) + BinarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2); + return N; +} + void SelectionDAG::clear() { allnodes_clear(); OperandAllocator.Reset(); @@ -963,11 +1012,12 @@ SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, SDLoc DL, EVT VT) { getNode(ISD::TRUNCATE, DL, VT, Op); } -SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, SDLoc SL, EVT VT) { +SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, SDLoc SL, EVT VT, + EVT OpVT) { if (VT.bitsLE(Op.getValueType())) return getNode(ISD::TRUNCATE, SL, VT, Op); - TargetLowering::BooleanContent BType = TLI->getBooleanContents(VT.isVector()); + TargetLowering::BooleanContent BType = TLI->getBooleanContents(OpVT); return getNode(TLI->getExtendForContent(BType), SL, VT, Op); } @@ -983,6 +1033,36 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, SDLoc DL, EVT VT) { getConstant(Imm, Op.getValueType())); } +SDValue SelectionDAG::getAnyExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) { + assert(VT.isVector() && "This DAG node is restricted to vector types."); + assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() && + "The sizes of the input and result must match in order to perform the " + "extend in-register."); + assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() && + "The destination vector type must have fewer lanes than the input."); + return getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Op); +} + +SDValue SelectionDAG::getSignExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) { + assert(VT.isVector() && "This DAG node is restricted to vector types."); + assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() && + "The sizes of the input and result must match in order to perform the " + "extend in-register."); + assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() && + "The destination vector type must have fewer lanes than the input."); + return getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT, Op); +} + +SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) { + assert(VT.isVector() && "This DAG node is restricted to vector types."); + assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() && + "The sizes of the input and result must match in order to perform the " + "extend in-register."); + assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() && + "The destination vector type must have fewer lanes than the input."); + return getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, Op); +} + /// getNOT - Create a bitwise NOT operation as (XOR Val, -1). /// SDValue SelectionDAG::getNOT(SDLoc DL, SDValue Val, EVT VT) { @@ -995,7 +1075,7 @@ SDValue SelectionDAG::getNOT(SDLoc DL, SDValue Val, EVT VT) { SDValue SelectionDAG::getLogicalNOT(SDLoc DL, SDValue Val, EVT VT) { EVT EltVT = VT.getScalarType(); SDValue TrueValue; - switch (TLI->getBooleanContents(VT.isVector())) { + switch (TLI->getBooleanContents(VT)) { case TargetLowering::ZeroOrOneBooleanContent: case TargetLowering::UndefinedBooleanContent: TrueValue = getConstant(1, VT); @@ -1190,15 +1270,8 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL, if (BitWidth < 64) Offset = SignExtend64(Offset, BitWidth); - const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV); - if (!GVar) { - // If GV is an alias then use the aliasee for determining thread-localness. - if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) - GVar = dyn_cast_or_null<GlobalVariable>(GA->getAliasee()); - } - unsigned Opc; - if (GVar && GVar->isThreadLocal()) + if (GV->isThreadLocal()) Opc = isTargetGA ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress; else Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress; @@ -1454,6 +1527,11 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, N1 = getUNDEF(VT); commuteShuffle(N1, N2, MaskVec); } + // Reset our undef status after accounting for the mask. + N2Undef = N2.getOpcode() == ISD::UNDEF; + // Re-check whether both sides ended up undef. + if (N1.getOpcode() == ISD::UNDEF && N2Undef) + return getUNDEF(VT); // If Identity shuffle return that node. bool Identity = true; @@ -1464,9 +1542,36 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, return N1; // Shuffling a constant splat doesn't change the result. - if (N2Undef && N1.getOpcode() == ISD::BUILD_VECTOR) - if (cast<BuildVectorSDNode>(N1)->getConstantSplatValue()) - return N1; + if (N2Undef) { + SDValue V = N1; + + // Look through any bitcasts. We check that these don't change the number + // (and size) of elements and just changes their types. + while (V.getOpcode() == ISD::BITCAST) + V = V->getOperand(0); + + // A splat should always show up as a build vector node. + if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) { + BitVector UndefElements; + SDValue Splat = BV->getSplatValue(&UndefElements); + // If this is a splat of an undef, shuffling it is also undef. + if (Splat && Splat.getOpcode() == ISD::UNDEF) + return getUNDEF(VT); + + // We only have a splat which can skip shuffles if there is a splatted + // value and no undef lanes rearranged by the shuffle. + if (Splat && UndefElements.none()) { + // Splat of <x, x, ..., x>, return <x, x, ..., x>, provided that the + // number of elements match or the value splatted is a zero constant. + if (V.getValueType().getVectorNumElements() == + VT.getVectorNumElements()) + return N1; + if (auto *C = dyn_cast<ConstantSDNode>(Splat)) + if (C->isNullValue()) + return N1; + } + } + } FoldingSetNodeID ID; SDValue Ops[2] = { N1, N2 }; @@ -1692,7 +1797,8 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, case ISD::SETTRUE: case ISD::SETTRUE2: { const TargetLowering *TLI = TM.getTargetLowering(); - TargetLowering::BooleanContent Cnt = TLI->getBooleanContents(VT.isVector()); + TargetLowering::BooleanContent Cnt = + TLI->getBooleanContents(N1->getValueType(0)); return getConstant( Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, VT); } @@ -1923,11 +2029,20 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, case ISD::UMULO: if (Op.getResNo() != 1) break; - // The boolean result conforms to getBooleanContents. Fall through. + // The boolean result conforms to getBooleanContents. + // If we know the result of a setcc has the top bits zero, use this info. + // We know that we have an integer-based boolean since these operations + // are only available for integer. + if (TLI->getBooleanContents(Op.getValueType().isVector(), false) == + TargetLowering::ZeroOrOneBooleanContent && + BitWidth > 1) + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); + break; case ISD::SETCC: // If we know the result of a setcc has the top bits zero, use this info. - if (TLI->getBooleanContents(Op.getValueType().isVector()) == - TargetLowering::ZeroOrOneBooleanContent && BitWidth > 1) + if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == + TargetLowering::ZeroOrOneBooleanContent && + BitWidth > 1) KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); break; case ISD::SHL: @@ -2043,7 +2158,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, unsigned MemBits = VT.getScalarType().getSizeInBits(); KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); } else if (const MDNode *Ranges = LD->getRanges()) { - computeKnownBitsLoad(*Ranges, KnownZero); + computeKnownBitsFromRangeMetadata(*Ranges, KnownZero); } break; } @@ -2192,8 +2307,11 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, const APInt &RA = Rem->getAPIntValue(); if (RA.isPowerOf2()) { APInt LowBits = (RA - 1); - KnownZero |= ~LowBits; - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne,Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth + 1); + + // The upper bits are all zero, the lower ones are unchanged. + KnownZero = KnownZero2 | ~LowBits; + KnownOne = KnownOne2 & LowBits; break; } } @@ -2323,9 +2441,16 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ if (Op.getResNo() != 1) break; // The boolean result conforms to getBooleanContents. Fall through. + // If setcc returns 0/-1, all bits are sign bits. + // We know that we have an integer-based boolean since these operations + // are only available for integer. + if (TLI->getBooleanContents(Op.getValueType().isVector(), false) == + TargetLowering::ZeroOrNegativeOneBooleanContent) + return VTBits; + break; case ISD::SETCC: // If setcc returns 0/-1, all bits are sign bits. - if (TLI->getBooleanContents(Op.getValueType().isVector()) == + if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == TargetLowering::ZeroOrNegativeOneBooleanContent) return VTBits; break; @@ -2940,7 +3065,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT, } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, - SDValue N2) { + SDValue N2, bool nuw, bool nsw, bool exact) { ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); switch (Opcode) { @@ -3380,22 +3505,25 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, } // Memoize this node if possible. - SDNode *N; + BinarySDNode *N; SDVTList VTs = getVTList(VT); + const bool BinOpHasFlags = isBinOpWithFlags(Opcode); if (VT != MVT::Glue) { - SDValue Ops[] = { N1, N2 }; + SDValue Ops[] = {N1, N2}; FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTs, Ops); + if (BinOpHasFlags) + AddBinaryNodeIDCustom(ID, Opcode, nuw, nsw, exact); void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), - DL.getDebugLoc(), VTs, N1, N2); + N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, nuw, nsw, exact); + CSEMap.InsertNode(N, IP); } else { - N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), - DL.getDebugLoc(), VTs, N1, N2); + + N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, nuw, nsw, exact); } AllNodes.push_back(N); @@ -3583,7 +3711,7 @@ static SDValue getMemsetStringVal(EVT VT, SDLoc dl, SelectionDAG &DAG, if (Str.empty()) { if (VT.isInteger()) return DAG.getConstant(0, VT); - else if (VT == MVT::f32 || VT == MVT::f64) + else if (VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128) return DAG.getConstantFP(0.0, VT); else if (VT.isVector()) { unsigned NumElts = VT.getVectorNumElements(); @@ -4110,7 +4238,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMCPY), Type::getVoidTy(*getContext()), getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY), - TLI->getPointerTy()), &Args, 0) + TLI->getPointerTy()), std::move(Args), 0) .setDiscardResult(); std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); @@ -4166,7 +4294,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst, .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMMOVE), Type::getVoidTy(*getContext()), getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE), - TLI->getPointerTy()), &Args, 0) + TLI->getPointerTy()), std::move(Args), 0) .setDiscardResult(); std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); @@ -4230,7 +4358,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst, .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET), Type::getVoidTy(*getContext()), getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET), - TLI->getPointerTy()), &Args, 0) + TLI->getPointerTy()), std::move(Args), 0) .setDiscardResult(); std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); @@ -4281,51 +4409,47 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, Ordering, SynchScope); } -SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, - SDValue Chain, SDValue Ptr, SDValue Cmp, - SDValue Swp, MachinePointerInfo PtrInfo, - unsigned Alignment, - AtomicOrdering SuccessOrdering, - AtomicOrdering FailureOrdering, - SynchronizationScope SynchScope) { +SDValue SelectionDAG::getAtomicCmpSwap( + unsigned Opcode, SDLoc dl, EVT MemVT, SDVTList VTs, SDValue Chain, + SDValue Ptr, SDValue Cmp, SDValue Swp, MachinePointerInfo PtrInfo, + unsigned Alignment, AtomicOrdering SuccessOrdering, + AtomicOrdering FailureOrdering, SynchronizationScope SynchScope) { + assert(Opcode == ISD::ATOMIC_CMP_SWAP || + Opcode == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS); + assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types"); + if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(MemVT); MachineFunction &MF = getMachineFunction(); - // All atomics are load and store, except for ATMOIC_LOAD and ATOMIC_STORE. - // For now, atomics are considered to be volatile always. // FIXME: Volatile isn't really correct; we should keep track of atomic // orderings in the memoperand. unsigned Flags = MachineMemOperand::MOVolatile; - if (Opcode != ISD::ATOMIC_STORE) - Flags |= MachineMemOperand::MOLoad; - if (Opcode != ISD::ATOMIC_LOAD) - Flags |= MachineMemOperand::MOStore; + Flags |= MachineMemOperand::MOLoad; + Flags |= MachineMemOperand::MOStore; MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment); - return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Cmp, Swp, MMO, - SuccessOrdering, FailureOrdering, SynchScope); + return getAtomicCmpSwap(Opcode, dl, MemVT, VTs, Chain, Ptr, Cmp, Swp, MMO, + SuccessOrdering, FailureOrdering, SynchScope); } -SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, - SDValue Chain, - SDValue Ptr, SDValue Cmp, - SDValue Swp, MachineMemOperand *MMO, - AtomicOrdering SuccessOrdering, - AtomicOrdering FailureOrdering, - SynchronizationScope SynchScope) { - assert(Opcode == ISD::ATOMIC_CMP_SWAP && "Invalid Atomic Op"); +SDValue SelectionDAG::getAtomicCmpSwap(unsigned Opcode, SDLoc dl, EVT MemVT, + SDVTList VTs, SDValue Chain, SDValue Ptr, + SDValue Cmp, SDValue Swp, + MachineMemOperand *MMO, + AtomicOrdering SuccessOrdering, + AtomicOrdering FailureOrdering, + SynchronizationScope SynchScope) { + assert(Opcode == ISD::ATOMIC_CMP_SWAP || + Opcode == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS); assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types"); - EVT VT = Cmp.getValueType(); - - SDVTList VTs = getVTList(VT, MVT::Other); SDValue Ops[] = {Chain, Ptr, Cmp, Swp}; - return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO, SuccessOrdering, - FailureOrdering, SynchScope); + return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO, + SuccessOrdering, FailureOrdering, SynchScope); } SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, @@ -5610,10 +5734,13 @@ SelectionDAG::getTargetInsertSubreg(int SRIdx, SDLoc DL, EVT VT, /// getNodeIfExists - Get the specified node if it's already available, or /// else return NULL. SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, - ArrayRef<SDValue> Ops) { - if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) { + ArrayRef<SDValue> Ops, bool nuw, bool nsw, + bool exact) { + if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTList, Ops); + if (isBinOpWithFlags(Opcode)) + AddBinaryNodeIDCustom(ID, nuw, nsw, exact); void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return E; @@ -5960,7 +6087,7 @@ unsigned SelectionDAG::AssignTopologicalOrder() { // count of outstanding operands. for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) { SDNode *N = I++; - checkForCycles(N); + checkForCycles(N, this); unsigned Degree = N->getNumOperands(); if (Degree == 0) { // A node with no uses, add it to the result array immediately. @@ -5980,7 +6107,7 @@ unsigned SelectionDAG::AssignTopologicalOrder() { // such that by the time the end is reached all nodes will be sorted. for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ++I) { SDNode *N = I; - checkForCycles(N); + checkForCycles(N, this); // N is in sorted position, so all its uses have one less operand // that needs to be sorted. for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); @@ -6005,7 +6132,9 @@ unsigned SelectionDAG::AssignTopologicalOrder() { #ifndef NDEBUG SDNode *S = ++I; dbgs() << "Overran sorted position:\n"; - S->dumprFull(); + S->dumprFull(this); dbgs() << "\n"; + dbgs() << "Checking if this is due to cycles\n"; + checkForCycles(this, true); #endif llvm_unreachable(nullptr); } @@ -6554,16 +6683,43 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, return true; } -ConstantSDNode *BuildVectorSDNode::getConstantSplatValue() const { - SDValue Op0 = getOperand(0); - if (Op0.getOpcode() != ISD::Constant) - return nullptr; +SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const { + if (UndefElements) { + UndefElements->clear(); + UndefElements->resize(getNumOperands()); + } + SDValue Splatted; + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + SDValue Op = getOperand(i); + if (Op.getOpcode() == ISD::UNDEF) { + if (UndefElements) + (*UndefElements)[i] = true; + } else if (!Splatted) { + Splatted = Op; + } else if (Splatted != Op) { + return SDValue(); + } + } + + if (!Splatted) { + assert(getOperand(0).getOpcode() == ISD::UNDEF && + "Can only have a splat without a constant for all undefs."); + return getOperand(0); + } - for (unsigned i = 1, e = getNumOperands(); i != e; ++i) - if (getOperand(i) != Op0) - return nullptr; + return Splatted; +} - return cast<ConstantSDNode>(Op0); +ConstantSDNode * +BuildVectorSDNode::getConstantSplatNode(BitVector *UndefElements) const { + return dyn_cast_or_null<ConstantSDNode>( + getSplatValue(UndefElements).getNode()); +} + +ConstantFPSDNode * +BuildVectorSDNode::getConstantFPSplatNode(BitVector *UndefElements) const { + return dyn_cast_or_null<ConstantFPSDNode>( + getSplatValue(UndefElements).getNode()); } bool BuildVectorSDNode::isConstant() const { @@ -6591,10 +6747,11 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) { return true; } -#ifdef XDEBUG +#ifndef NDEBUG static void checkForCyclesHelper(const SDNode *N, SmallPtrSet<const SDNode*, 32> &Visited, - SmallPtrSet<const SDNode*, 32> &Checked) { + SmallPtrSet<const SDNode*, 32> &Checked, + const llvm::SelectionDAG *DAG) { // If this node has already been checked, don't check it again. if (Checked.count(N)) return; @@ -6602,29 +6759,37 @@ static void checkForCyclesHelper(const SDNode *N, // If a node has already been visited on this depth-first walk, reject it as // a cycle. if (!Visited.insert(N)) { - dbgs() << "Offending node:\n"; - N->dumprFull(); errs() << "Detected cycle in SelectionDAG\n"; + dbgs() << "Offending node:\n"; + N->dumprFull(DAG); dbgs() << "\n"; abort(); } for(unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - checkForCyclesHelper(N->getOperand(i).getNode(), Visited, Checked); + checkForCyclesHelper(N->getOperand(i).getNode(), Visited, Checked, DAG); Checked.insert(N); Visited.erase(N); } #endif -void llvm::checkForCycles(const llvm::SDNode *N) { +void llvm::checkForCycles(const llvm::SDNode *N, + const llvm::SelectionDAG *DAG, + bool force) { +#ifndef NDEBUG + bool check = force; #ifdef XDEBUG - assert(N && "Checking nonexistent SDNode"); - SmallPtrSet<const SDNode*, 32> visited; - SmallPtrSet<const SDNode*, 32> checked; - checkForCyclesHelper(N, visited, checked); -#endif + check = true; +#endif // XDEBUG + if (check) { + assert(N && "Checking nonexistent SDNode"); + SmallPtrSet<const SDNode*, 32> visited; + SmallPtrSet<const SDNode*, 32> checked; + checkForCyclesHelper(N, visited, checked, DAG); + } +#endif // !NDEBUG } -void llvm::checkForCycles(const llvm::SelectionDAG *DAG) { - checkForCycles(DAG->getRoot().getNode()); +void llvm::checkForCycles(const llvm::SelectionDAG *DAG, bool force) { + checkForCycles(DAG->getRoot().getNode(), DAG, force); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 070e929..28d8e98 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -169,7 +169,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, SDValue Lo, Hi; Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]); Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]); - if (TLI.isBigEndian()) + if (TLI.hasBigEndianPartOrdering(ValueVT)) std::swap(Lo, Hi); Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi); } else { @@ -2784,8 +2784,22 @@ void SelectionDAGBuilder::visitFSub(const User &I) { void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); - setValue(&I, DAG.getNode(OpCode, getCurSDLoc(), - Op1.getValueType(), Op1, Op2)); + + bool nuw = false; + bool nsw = false; + bool exact = false; + if (const OverflowingBinaryOperator *OFBinOp = + dyn_cast<const OverflowingBinaryOperator>(&I)) { + nuw = OFBinOp->hasNoUnsignedWrap(); + nsw = OFBinOp->hasNoSignedWrap(); + } + if (const PossiblyExactOperator *ExactOp = + dyn_cast<const PossiblyExactOperator>(&I)) + exact = ExactOp->isExact(); + + SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(), + Op1, Op2, nuw, nsw, exact); + setValue(&I, BinNodeValue); } void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { @@ -2816,8 +2830,25 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32); } - setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), - Op1.getValueType(), Op1, Op2)); + bool nuw = false; + bool nsw = false; + bool exact = false; + + if (Opcode == ISD::SRL || Opcode == ISD::SRA || Opcode == ISD::SHL) { + + if (const OverflowingBinaryOperator *OFBinOp = + dyn_cast<const OverflowingBinaryOperator>(&I)) { + nuw = OFBinOp->hasNoUnsignedWrap(); + nsw = OFBinOp->hasNoSignedWrap(); + } + if (const PossiblyExactOperator *ExactOp = + dyn_cast<const PossiblyExactOperator>(&I)) + exact = ExactOp->isExact(); + } + + SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2, + nuw, nsw, exact); + setValue(&I, Res); } void SelectionDAGBuilder::visitSDiv(const User &I) { @@ -3570,12 +3601,12 @@ static SDValue InsertFenceForAtomic(SDValue Chain, AtomicOrdering Order, if (Before) { if (Order == AcquireRelease || Order == SequentiallyConsistent) Order = Release; - else if (Order == Acquire || Order == Monotonic) + else if (Order == Acquire || Order == Monotonic || Order == Unordered) return Chain; } else { if (Order == AcquireRelease) Order = Acquire; - else if (Order == Release || Order == Monotonic) + else if (Order == Release || Order == Monotonic || Order == Unordered) return Chain; } SDValue Ops[3]; @@ -3598,19 +3629,17 @@ void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { InChain = InsertFenceForAtomic(InChain, SuccessOrder, Scope, true, dl, DAG, *TLI); - SDValue L = - DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, - getValue(I.getCompareOperand()).getSimpleValueType(), - InChain, - getValue(I.getPointerOperand()), - getValue(I.getCompareOperand()), - getValue(I.getNewValOperand()), - MachinePointerInfo(I.getPointerOperand()), 0 /* Alignment */, - TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder, - TLI->getInsertFencesForAtomic() ? Monotonic : FailureOrder, - Scope); + MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType(); + SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other); + SDValue L = DAG.getAtomicCmpSwap( + ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, MemVT, VTs, InChain, + getValue(I.getPointerOperand()), getValue(I.getCompareOperand()), + getValue(I.getNewValOperand()), MachinePointerInfo(I.getPointerOperand()), + 0 /* Alignment */, + TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder, + TLI->getInsertFencesForAtomic() ? Monotonic : FailureOrder, Scope); - SDValue OutChain = L.getValue(1); + SDValue OutChain = L.getValue(2); if (TLI->getInsertFencesForAtomic()) OutChain = InsertFenceForAtomic(OutChain, SuccessOrder, Scope, false, dl, @@ -5293,7 +5322,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { CLI.setDebugLoc(sdl).setChain(getRoot()) .setCallee(CallingConv::C, I.getType(), DAG.getExternalSymbol(TrapFuncName.data(), TLI->getPointerTy()), - &Args, 0); + std::move(Args), 0); std::pair<SDValue, SDValue> Result = TLI->LowerCallTo(CLI); DAG.setRoot(Result.second); @@ -5410,6 +5439,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool isTailCall, MachineBasicBlock *LandingPad) { + const TargetLowering *TLI = TM.getTargetLowering(); PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); FunctionType *FTy = cast<FunctionType>(PT->getElementType()); Type *RetTy = FTy->getReturnType(); @@ -5420,45 +5450,6 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, TargetLowering::ArgListEntry Entry; Args.reserve(CS.arg_size()); - // Check whether the function can return without sret-demotion. - SmallVector<ISD::OutputArg, 4> Outs; - const TargetLowering *TLI = TM.getTargetLowering(); - GetReturnInfo(RetTy, CS.getAttributes(), Outs, *TLI); - - bool CanLowerReturn = TLI->CanLowerReturn(CS.getCallingConv(), - DAG.getMachineFunction(), - FTy->isVarArg(), Outs, - FTy->getContext()); - - SDValue DemoteStackSlot; - int DemoteStackIdx = -100; - - if (!CanLowerReturn) { - assert(!CS.hasInAllocaArgument() && - "sret demotion is incompatible with inalloca"); - uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize( - FTy->getReturnType()); - unsigned Align = TLI->getDataLayout()->getPrefTypeAlignment( - FTy->getReturnType()); - MachineFunction &MF = DAG.getMachineFunction(); - DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); - Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType()); - - DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI->getPointerTy()); - Entry.Node = DemoteStackSlot; - Entry.Ty = StackSlotPtrType; - Entry.isSExt = false; - Entry.isZExt = false; - Entry.isInReg = false; - Entry.isSRet = true; - Entry.isNest = false; - Entry.isByVal = false; - Entry.isReturned = false; - Entry.Alignment = Align; - Args.push_back(Entry); - RetTy = Type::getVoidTy(FTy->getContext()); - } - for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) { const Value *V = *i; @@ -5499,58 +5490,20 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, // Check if target-independent constraints permit a tail call here. // Target-dependent constraints are checked within TLI->LowerCallTo. - if (isTailCall && !isInTailCallPosition(CS, *TLI)) + if (isTailCall && !isInTailCallPosition(CS, DAG)) isTailCall = false; TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot()) - .setCallee(RetTy, FTy, Callee, &Args, CS).setTailCall(isTailCall); + .setCallee(RetTy, FTy, Callee, std::move(Args), CS).setTailCall(isTailCall); std::pair<SDValue,SDValue> Result = TLI->LowerCallTo(CLI); assert((isTailCall || Result.second.getNode()) && "Non-null chain expected with non-tail call!"); assert((Result.second.getNode() || !Result.first.getNode()) && "Null value expected with tail call!"); - if (Result.first.getNode()) { + if (Result.first.getNode()) setValue(CS.getInstruction(), Result.first); - } else if (!CanLowerReturn && Result.second.getNode()) { - // The instruction result is the result of loading from the - // hidden sret parameter. - SmallVector<EVT, 1> PVTs; - Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType()); - - ComputeValueVTs(*TLI, PtrRetTy, PVTs); - assert(PVTs.size() == 1 && "Pointers should fit in one register"); - EVT PtrVT = PVTs[0]; - - SmallVector<EVT, 4> RetTys; - SmallVector<uint64_t, 4> Offsets; - RetTy = FTy->getReturnType(); - ComputeValueVTs(*TLI, RetTy, RetTys, &Offsets); - - unsigned NumValues = RetTys.size(); - SmallVector<SDValue, 4> Values(NumValues); - SmallVector<SDValue, 4> Chains(NumValues); - - for (unsigned i = 0; i < NumValues; ++i) { - SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), PtrVT, - DemoteStackSlot, - DAG.getConstant(Offsets[i], PtrVT)); - SDValue L = DAG.getLoad(RetTys[i], getCurSDLoc(), Result.second, Add, - MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), - false, false, false, 1); - Values[i] = L; - Chains[i] = L.getValue(1); - } - - SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), - MVT::Other, Chains); - PendingLoads.push_back(Chain); - - setValue(CS.getInstruction(), - DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), - DAG.getVTList(RetTys), Values)); - } if (!Result.second.getNode()) { // As a special case, a null chain means that a tail call has been emitted @@ -6845,7 +6798,7 @@ SelectionDAGBuilder::LowerCallOperands(const CallInst &CI, unsigned ArgIdx, Type *retTy = useVoidTy ? Type::getVoidTy(*DAG.getContext()) : CI.getType(); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot()) - .setCallee(CI.getCallingConv(), retTy, Callee, &Args, NumArgs) + .setCallee(CI.getCallingConv(), retTy, Callee, std::move(Args), NumArgs) .setDiscardResult(!CI.use_empty()); const TargetLowering *TLI = TM.getTargetLowering(); @@ -7092,6 +7045,21 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { FuncInfo.MF->getFrameInfo()->setHasPatchPoint(); } +/// Returns an AttributeSet representing the attributes applied to the return +/// value of the given call. +static AttributeSet getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) { + SmallVector<Attribute::AttrKind, 2> Attrs; + if (CLI.RetSExt) + Attrs.push_back(Attribute::SExt); + if (CLI.RetZExt) + Attrs.push_back(Attribute::ZExt); + if (CLI.IsInReg) + Attrs.push_back(Attribute::InReg); + + return AttributeSet::get(CLI.RetTy->getContext(), AttributeSet::ReturnIndex, + Attrs); +} + /// TargetLowering::LowerCallTo - This is the default LowerCallTo /// implementation, which just calls LowerCall. /// FIXME: When all targets are @@ -7100,24 +7068,62 @@ std::pair<SDValue, SDValue> TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // Handle the incoming return values from the call. CLI.Ins.clear(); + Type *OrigRetTy = CLI.RetTy; SmallVector<EVT, 4> RetTys; - ComputeValueVTs(*this, CLI.RetTy, RetTys); - for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { - EVT VT = RetTys[I]; - MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); - unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); - for (unsigned i = 0; i != NumRegs; ++i) { - ISD::InputArg MyFlags; - MyFlags.VT = RegisterVT; - MyFlags.ArgVT = VT; - MyFlags.Used = CLI.IsReturnValueUsed; - if (CLI.RetSExt) - MyFlags.Flags.setSExt(); - if (CLI.RetZExt) - MyFlags.Flags.setZExt(); - if (CLI.IsInReg) - MyFlags.Flags.setInReg(); - CLI.Ins.push_back(MyFlags); + SmallVector<uint64_t, 4> Offsets; + ComputeValueVTs(*this, CLI.RetTy, RetTys, &Offsets); + + SmallVector<ISD::OutputArg, 4> Outs; + GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, *this); + + bool CanLowerReturn = + this->CanLowerReturn(CLI.CallConv, CLI.DAG.getMachineFunction(), + CLI.IsVarArg, Outs, CLI.RetTy->getContext()); + + SDValue DemoteStackSlot; + int DemoteStackIdx = -100; + if (!CanLowerReturn) { + // FIXME: equivalent assert? + // assert(!CS.hasInAllocaArgument() && + // "sret demotion is incompatible with inalloca"); + uint64_t TySize = getDataLayout()->getTypeAllocSize(CLI.RetTy); + unsigned Align = getDataLayout()->getPrefTypeAlignment(CLI.RetTy); + MachineFunction &MF = CLI.DAG.getMachineFunction(); + DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); + Type *StackSlotPtrType = PointerType::getUnqual(CLI.RetTy); + + DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getPointerTy()); + ArgListEntry Entry; + Entry.Node = DemoteStackSlot; + Entry.Ty = StackSlotPtrType; + Entry.isSExt = false; + Entry.isZExt = false; + Entry.isInReg = false; + Entry.isSRet = true; + Entry.isNest = false; + Entry.isByVal = false; + Entry.isReturned = false; + Entry.Alignment = Align; + CLI.getArgs().insert(CLI.getArgs().begin(), Entry); + CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext()); + } else { + for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { + EVT VT = RetTys[I]; + MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); + unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); + for (unsigned i = 0; i != NumRegs; ++i) { + ISD::InputArg MyFlags; + MyFlags.VT = RegisterVT; + MyFlags.ArgVT = VT; + MyFlags.Used = CLI.IsReturnValueUsed; + if (CLI.RetSExt) + MyFlags.Flags.setSExt(); + if (CLI.RetZExt) + MyFlags.Flags.setZExt(); + if (CLI.IsInReg) + MyFlags.Flags.setInReg(); + CLI.Ins.push_back(MyFlags); + } } } @@ -7260,31 +7266,59 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { "LowerCall emitted a value with the wrong type!"); }); - // Collect the legal value parts into potentially illegal values - // that correspond to the original function's return values. - ISD::NodeType AssertOp = ISD::DELETED_NODE; - if (CLI.RetSExt) - AssertOp = ISD::AssertSext; - else if (CLI.RetZExt) - AssertOp = ISD::AssertZext; SmallVector<SDValue, 4> ReturnValues; - unsigned CurReg = 0; - for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { - EVT VT = RetTys[I]; - MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); - unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); - - ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg], - NumRegs, RegisterVT, VT, nullptr, - AssertOp)); - CurReg += NumRegs; - } - - // For a function returning void, there is no return value. We can't create - // such a node, so we just return a null return value in that case. In - // that case, nothing will actually look at the value. - if (ReturnValues.empty()) - return std::make_pair(SDValue(), CLI.Chain); + if (!CanLowerReturn) { + // The instruction result is the result of loading from the + // hidden sret parameter. + SmallVector<EVT, 1> PVTs; + Type *PtrRetTy = PointerType::getUnqual(OrigRetTy); + + ComputeValueVTs(*this, PtrRetTy, PVTs); + assert(PVTs.size() == 1 && "Pointers should fit in one register"); + EVT PtrVT = PVTs[0]; + + unsigned NumValues = RetTys.size(); + ReturnValues.resize(NumValues); + SmallVector<SDValue, 4> Chains(NumValues); + + for (unsigned i = 0; i < NumValues; ++i) { + SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot, + CLI.DAG.getConstant(Offsets[i], PtrVT)); + SDValue L = CLI.DAG.getLoad( + RetTys[i], CLI.DL, CLI.Chain, Add, + MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), false, + false, false, 1); + ReturnValues[i] = L; + Chains[i] = L.getValue(1); + } + + CLI.Chain = CLI.DAG.getNode(ISD::TokenFactor, CLI.DL, MVT::Other, Chains); + } else { + // Collect the legal value parts into potentially illegal values + // that correspond to the original function's return values. + ISD::NodeType AssertOp = ISD::DELETED_NODE; + if (CLI.RetSExt) + AssertOp = ISD::AssertSext; + else if (CLI.RetZExt) + AssertOp = ISD::AssertZext; + unsigned CurReg = 0; + for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { + EVT VT = RetTys[I]; + MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); + unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); + + ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg], + NumRegs, RegisterVT, VT, nullptr, + AssertOp)); + CurReg += NumRegs; + } + + // For a function returning void, there is no return value. We can't create + // such a node, so we just return a null return value in that case. In + // that case, nothing will actually look at the value. + if (ReturnValues.empty()) + return std::make_pair(SDValue(), CLI.Chain); + } SDValue Res = CLI.DAG.getNode(ISD::MERGE_VALUES, CLI.DL, CLI.DAG.getVTList(RetTys), ReturnValues); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index fb29691..84679f9 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -320,7 +320,7 @@ private: /// 1. Preserve the architecture independence of stack protector generation. /// /// 2. Preserve the normal IR level stack protector check for platforms like - /// OpenBSD for which we support platform specific stack protector + /// OpenBSD for which we support platform-specific stack protector /// generation. /// /// The main problem that guided the present solution is that one can not @@ -338,7 +338,7 @@ private: /// basic block (where the return inst is placed) and then move it back /// later at SelectionDAG/MI time before the stack protector check if the /// tail call optimization failed. The MI level option was nixed - /// immediately since it would require platform specific pattern + /// immediately since it would require platform-specific pattern /// matching. The SelectionDAG level option was nixed because /// SelectionDAG only processes one IR level basic block at a time /// implying one could not create a DAG Combine to move the callinst. diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index d6b5255..b3a452f 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -55,6 +55,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::PREFETCH: return "Prefetch"; case ISD::ATOMIC_FENCE: return "AtomicFence"; case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap"; + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: return "AtomicCmpSwapWithSuccess"; case ISD::ATOMIC_SWAP: return "AtomicSwap"; case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd"; case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub"; @@ -220,6 +221,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::ZERO_EXTEND: return "zero_extend"; case ISD::ANY_EXTEND: return "any_extend"; case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg"; + case ISD::ANY_EXTEND_VECTOR_INREG: return "any_extend_vector_inreg"; + case ISD::SIGN_EXTEND_VECTOR_INREG: return "sign_extend_vector_inreg"; + case ISD::ZERO_EXTEND_VECTOR_INREG: return "zero_extend_vector_inreg"; case ISD::TRUNCATE: return "truncate"; case ISD::FP_ROUND: return "fp_round"; case ISD::FLT_ROUNDS_: return "flt_rounds"; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 472fc9c..57e22e2 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -141,6 +141,25 @@ STATISTIC(NumFastIselFailShuffleVector,"Fast isel fails on ShuffleVector"); STATISTIC(NumFastIselFailExtractValue,"Fast isel fails on ExtractValue"); STATISTIC(NumFastIselFailInsertValue,"Fast isel fails on InsertValue"); STATISTIC(NumFastIselFailLandingPad,"Fast isel fails on LandingPad"); + +// Intrinsic instructions... +STATISTIC(NumFastIselFailIntrinsicCall, "Fast isel fails on Intrinsic call"); +STATISTIC(NumFastIselFailSAddWithOverflow, + "Fast isel fails on sadd.with.overflow"); +STATISTIC(NumFastIselFailUAddWithOverflow, + "Fast isel fails on uadd.with.overflow"); +STATISTIC(NumFastIselFailSSubWithOverflow, + "Fast isel fails on ssub.with.overflow"); +STATISTIC(NumFastIselFailUSubWithOverflow, + "Fast isel fails on usub.with.overflow"); +STATISTIC(NumFastIselFailSMulWithOverflow, + "Fast isel fails on smul.with.overflow"); +STATISTIC(NumFastIselFailUMulWithOverflow, + "Fast isel fails on umul.with.overflow"); +STATISTIC(NumFastIselFailFrameaddress, "Fast isel fails on Frameaddress"); +STATISTIC(NumFastIselFailSqrt, "Fast isel fails on sqrt call"); +STATISTIC(NumFastIselFailStackMap, "Fast isel fails on StackMap call"); +STATISTIC(NumFastIselFailPatchPoint, "Fast isel fails on PatchPoint call"); #endif static cl::opt<bool> @@ -974,7 +993,37 @@ static void collectFailStats(const Instruction *I) { case Instruction::FCmp: NumFastIselFailFCmp++; return; case Instruction::PHI: NumFastIselFailPHI++; return; case Instruction::Select: NumFastIselFailSelect++; return; - case Instruction::Call: NumFastIselFailCall++; return; + case Instruction::Call: { + if (auto const *Intrinsic = dyn_cast<IntrinsicInst>(I)) { + switch (Intrinsic->getIntrinsicID()) { + default: + NumFastIselFailIntrinsicCall++; return; + case Intrinsic::sadd_with_overflow: + NumFastIselFailSAddWithOverflow++; return; + case Intrinsic::uadd_with_overflow: + NumFastIselFailUAddWithOverflow++; return; + case Intrinsic::ssub_with_overflow: + NumFastIselFailSSubWithOverflow++; return; + case Intrinsic::usub_with_overflow: + NumFastIselFailUSubWithOverflow++; return; + case Intrinsic::smul_with_overflow: + NumFastIselFailSMulWithOverflow++; return; + case Intrinsic::umul_with_overflow: + NumFastIselFailUMulWithOverflow++; return; + case Intrinsic::frameaddress: + NumFastIselFailFrameaddress++; return; + case Intrinsic::sqrt: + NumFastIselFailSqrt++; return; + case Intrinsic::experimental_stackmap: + NumFastIselFailStackMap++; return; + case Intrinsic::experimental_patchpoint_void: // fall-through + case Intrinsic::experimental_patchpoint_i64: + NumFastIselFailPatchPoint++; return; + } + } + NumFastIselFailCall++; + return; + } case Instruction::Shl: NumFastIselFailShl++; return; case Instruction::LShr: NumFastIselFailLShr++; return; case Instruction::AShr: NumFastIselFailAShr++; return; diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index b75d805..42372a2 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -105,7 +105,7 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) - .setCallee(getLibcallCallingConv(LC), RetTy, Callee, &Args, 0) + .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) .setNoReturn(doesNotReturn).setDiscardResult(!isReturnValueUsed) .setSExtResult(isSigned).setZExtResult(!isSigned); return LowerCallTo(CLI); @@ -327,6 +327,10 @@ TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op, assert(Op.getNode()->getNumValues() == 1 && "ShrinkDemandedOp only supports nodes with one result!"); + // Early return, as this function cannot handle vector types. + if (Op.getValueType().isVector()) + return false; + // Don't do this if the node has another user, which may require the // full value. if (!Op.getNode()->hasOneUse()) @@ -1146,18 +1150,21 @@ bool TargetLowering::isConstTrueVal(const SDNode *N) const { if (!N) return false; - bool IsVec = false; const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); if (!CN) { const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N); if (!BV) return false; - IsVec = true; - CN = BV->getConstantSplatValue(); + BitVector UndefElements; + CN = BV->getConstantSplatNode(&UndefElements); + // Only interested in constant splats, and we don't try to handle undef + // elements in identifying boolean constants. + if (!CN || UndefElements.none()) + return false; } - switch (getBooleanContents(IsVec)) { + switch (getBooleanContents(N->getValueType(0))) { case UndefinedBooleanContent: return CN->getAPIntValue()[0]; case ZeroOrOneBooleanContent: @@ -1173,18 +1180,21 @@ bool TargetLowering::isConstFalseVal(const SDNode *N) const { if (!N) return false; - bool IsVec = false; const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); if (!CN) { const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N); if (!BV) return false; - IsVec = true; - CN = BV->getConstantSplatValue(); + BitVector UndefElements; + CN = BV->getConstantSplatNode(&UndefElements); + // Only interested in constant splats, and we don't try to handle undef + // elements in identifying boolean constants. + if (!CN || UndefElements.none()) + return false; } - if (getBooleanContents(IsVec) == UndefinedBooleanContent) + if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent) return !CN->getAPIntValue()[0]; return CN->isNullValue(); @@ -1205,7 +1215,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, case ISD::SETFALSE2: return DAG.getConstant(0, VT); case ISD::SETTRUE: case ISD::SETTRUE2: { - TargetLowering::BooleanContent Cnt = getBooleanContents(VT.isVector()); + TargetLowering::BooleanContent Cnt = + getBooleanContents(N0->getValueType(0)); return DAG.getConstant( Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, VT); } @@ -1412,7 +1423,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0), NewConst, Cond); - return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT); + return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType()); } break; } @@ -1496,7 +1507,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } } else if (N1C->getAPIntValue() == 1 && (VT == MVT::i1 || - getBooleanContents(false) == ZeroOrOneBooleanContent)) { + getBooleanContents(N0->getValueType(0)) == + ZeroOrOneBooleanContent)) { SDValue Op0 = N0; if (Op0.getOpcode() == ISD::TRUNCATE) Op0 = Op0.getOperand(0); @@ -1767,7 +1779,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // The sext(setcc()) => setcc() optimization relies on the appropriate // constant being emitted. uint64_t EqVal = 0; - switch (getBooleanContents(N0.getValueType().isVector())) { + switch (getBooleanContents(N0.getValueType())) { case UndefinedBooleanContent: case ZeroOrOneBooleanContent: EqVal = ISD::isTrueWhenEqual(Cond); @@ -2613,7 +2625,8 @@ SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, SDLoc dl, if (ShAmt) { // TODO: For UDIV use SRL instead of SRA. SDValue Amt = DAG.getConstant(ShAmt, getShiftAmountTy(Op1.getValueType())); - Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt); + Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt, false, false, + true); d = d.ashr(ShAmt); } diff --git a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp index 1120be8..0e89bad 100644 --- a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp +++ b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp @@ -15,8 +15,8 @@ #include "llvm/Target/TargetMachine.h" using namespace llvm; -TargetSelectionDAGInfo::TargetSelectionDAGInfo(const TargetMachine &TM) - : DL(TM.getDataLayout()) { +TargetSelectionDAGInfo::TargetSelectionDAGInfo(const DataLayout *DL) + : DL(DL) { } TargetSelectionDAGInfo::~TargetSelectionDAGInfo() { diff --git a/lib/CodeGen/StackMapLivenessAnalysis.cpp b/lib/CodeGen/StackMapLivenessAnalysis.cpp index 4dd87dd..3ba502f 100644 --- a/lib/CodeGen/StackMapLivenessAnalysis.cpp +++ b/lib/CodeGen/StackMapLivenessAnalysis.cpp @@ -28,10 +28,9 @@ using namespace llvm; #define DEBUG_TYPE "stackmaps" namespace llvm { -cl::opt<bool> EnableStackMapLiveness("enable-stackmap-liveness", - cl::Hidden, cl::desc("Enable StackMap Liveness Analysis Pass")); cl::opt<bool> EnablePatchPointLiveness("enable-patchpoint-liveness", - cl::Hidden, cl::desc("Enable PatchPoint Liveness Analysis Pass")); + cl::Hidden, cl::init(true), + cl::desc("Enable PatchPoint Liveness Analysis Pass")); } STATISTIC(NumStackMapFuncVisited, "Number of functions visited"); @@ -62,15 +61,17 @@ void StackMapLiveness::getAnalysisUsage(AnalysisUsage &AU) const { /// Calculate the liveness information for the given machine function. bool StackMapLiveness::runOnMachineFunction(MachineFunction &_MF) { + if (!EnablePatchPointLiveness) + return false; + DEBUG(dbgs() << "********** COMPUTING STACKMAP LIVENESS: " << _MF.getName() << " **********\n"); MF = &_MF; TRI = MF->getTarget().getRegisterInfo(); ++NumStackMapFuncVisited; - // Skip this function if there are no stackmaps or patchpoints to process. - if (!((MF->getFrameInfo()->hasStackMap() && EnableStackMapLiveness) || - (MF->getFrameInfo()->hasPatchPoint() && EnablePatchPointLiveness))) { + // Skip this function if there are no patchpoints to process. + if (!MF->getFrameInfo()->hasPatchPoint()) { ++NumStackMapFuncSkipped; return false; } @@ -88,13 +89,10 @@ bool StackMapLiveness::calculateLiveness() { LiveRegs.addLiveOuts(MBBI); bool HasStackMap = false; // Reverse iterate over all instructions and add the current live register - // set to an instruction if we encounter a stackmap or patchpoint - // instruction. + // set to an instruction if we encounter a patchpoint instruction. for (MachineBasicBlock::reverse_iterator I = MBBI->rbegin(), E = MBBI->rend(); I != E; ++I) { - int Opc = I->getOpcode(); - if ((EnableStackMapLiveness && (Opc == TargetOpcode::STACKMAP)) || - (EnablePatchPointLiveness && (Opc == TargetOpcode::PATCHPOINT))) { + if (I->getOpcode() == TargetOpcode::PATCHPOINT) { addLiveOutSetToMI(*I); HasChanged = true; HasStackMap = true; diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp index c3f84c6..83966bd0 100644 --- a/lib/CodeGen/TargetInstrInfo.cpp +++ b/lib/CodeGen/TargetInstrInfo.cpp @@ -671,7 +671,7 @@ bool TargetInstrInfo::usePreRAHazardRecognizer() const { // Default implementation of CreateTargetRAHazardRecognizer. ScheduleHazardRecognizer *TargetInstrInfo:: -CreateTargetHazardRecognizer(const TargetMachine *TM, +CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const { // Dummy hazard recognizer allows all instructions to issue. return new ScheduleHazardRecognizer(); diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index 2634d71..c574fd4 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -39,7 +39,7 @@ using namespace llvm; /// InitLibcallNames - Set default libcall names. /// -static void InitLibcallNames(const char **Names, const TargetMachine &TM) { +static void InitLibcallNames(const char **Names, const Triple &TT) { Names[RTLIB::SHL_I16] = "__ashlhi3"; Names[RTLIB::SHL_I32] = "__ashlsi3"; Names[RTLIB::SHL_I64] = "__ashldi3"; @@ -384,7 +384,7 @@ static void InitLibcallNames(const char **Names, const TargetMachine &TM) { Names[RTLIB::SYNC_FETCH_AND_UMIN_8] = "__sync_fetch_and_umin_8"; Names[RTLIB::SYNC_FETCH_AND_UMIN_16] = "__sync_fetch_and_umin_16"; - if (Triple(TM.getTargetTriple()).getEnvironment() == Triple::GNU) { + if (TT.getEnvironment() == Triple::GNU) { Names[RTLIB::SINCOS_F32] = "sincosf"; Names[RTLIB::SINCOS_F64] = "sincos"; Names[RTLIB::SINCOS_F80] = "sincosl"; @@ -399,7 +399,7 @@ static void InitLibcallNames(const char **Names, const TargetMachine &TM) { Names[RTLIB::SINCOS_PPCF128] = nullptr; } - if (Triple(TM.getTargetTriple()).getOS() != Triple::OpenBSD) { + if (TT.getOS() != Triple::OpenBSD) { Names[RTLIB::STACKPROTECTOR_CHECK_FAIL] = "__stack_chk_fail"; } else { // These are generally not available. @@ -690,6 +690,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm, ExceptionPointerRegister = 0; ExceptionSelectorRegister = 0; BooleanContents = UndefinedBooleanContent; + BooleanFloatContents = UndefinedBooleanContent; BooleanVectorContents = UndefinedBooleanContent; SchedPreferenceInfo = Sched::ILP; JumpBufSize = 0; @@ -702,7 +703,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm, SupportJumpTables = true; MinimumJumpTableEntries = 4; - InitLibcallNames(LibcallRoutineNames, TM); + InitLibcallNames(LibcallRoutineNames, Triple(TM.getTargetTriple())); InitCmpLibcallCCs(CmpLibcallCCs); InitLibcallCallingConvs(LibcallCallingConvs); } @@ -730,6 +731,10 @@ void TargetLoweringBase::initActions() { setIndexedStoreAction(IM, (MVT::SimpleValueType)VT, Expand); } + // Most backends expect to see the node which just returns the value loaded. + setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, + (MVT::SimpleValueType)VT, Expand); + // These operations default to expand. setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand); @@ -739,8 +744,15 @@ void TargetLoweringBase::initActions() { // These operations default to expand for vector types. if (VT >= MVT::FIRST_VECTOR_VALUETYPE && - VT <= MVT::LAST_VECTOR_VALUETYPE) + VT <= MVT::LAST_VECTOR_VALUETYPE) { setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, + (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, + (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, + (MVT::SimpleValueType)VT, Expand); + } } // Most targets ignore the @llvm.prefetch intrinsic. @@ -1080,24 +1092,25 @@ void TargetLoweringBase::computeRegisterProperties() { // Loop over all of the vector value types to see which need transformations. for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE; i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { - MVT VT = (MVT::SimpleValueType)i; - if (isTypeLegal(VT)) continue; + MVT VT = (MVT::SimpleValueType) i; + if (isTypeLegal(VT)) + continue; - // Determine if there is a legal wider type. If so, we should promote to - // that wider vector type. MVT EltVT = VT.getVectorElementType(); unsigned NElts = VT.getVectorNumElements(); - if (NElts != 1 && !shouldSplitVectorType(VT)) { - bool IsLegalWiderType = false; - // First try to promote the elements of integer vectors. If no legal - // promotion was found, fallback to the widen-vector method. - for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { - MVT SVT = (MVT::SimpleValueType)nVT; + bool IsLegalWiderType = false; + LegalizeTypeAction PreferredAction = getPreferredVectorAction(VT); + switch (PreferredAction) { + case TypePromoteInteger: { + // Try to promote the elements of integer vectors. If no legal + // promotion was found, fall through to the widen-vector method. + for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { + MVT SVT = (MVT::SimpleValueType) nVT; // Promote vectors of integers to vectors with the same number // of elements, with a wider element type. if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits() - && SVT.getVectorNumElements() == NElts && - isTypeLegal(SVT) && SVT.getScalarType().isInteger()) { + && SVT.getVectorNumElements() == NElts && isTypeLegal(SVT) + && SVT.getScalarType().isInteger()) { TransformToType[i] = SVT; RegisterTypeForVT[i] = SVT; NumRegistersForVT[i] = 1; @@ -1106,15 +1119,15 @@ void TargetLoweringBase::computeRegisterProperties() { break; } } - - if (IsLegalWiderType) continue; - + if (IsLegalWiderType) + break; + } + case TypeWidenVector: { // Try to widen the vector. - for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { - MVT SVT = (MVT::SimpleValueType)nVT; - if (SVT.getVectorElementType() == EltVT && - SVT.getVectorNumElements() > NElts && - isTypeLegal(SVT)) { + for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { + MVT SVT = (MVT::SimpleValueType) nVT; + if (SVT.getVectorElementType() == EltVT + && SVT.getVectorNumElements() > NElts && isTypeLegal(SVT)) { TransformToType[i] = SVT; RegisterTypeForVT[i] = SVT; NumRegistersForVT[i] = 1; @@ -1123,27 +1136,34 @@ void TargetLoweringBase::computeRegisterProperties() { break; } } - if (IsLegalWiderType) continue; + if (IsLegalWiderType) + break; } - - MVT IntermediateVT; - MVT RegisterVT; - unsigned NumIntermediates; - NumRegistersForVT[i] = - getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates, - RegisterVT, this); - RegisterTypeForVT[i] = RegisterVT; - - MVT NVT = VT.getPow2VectorType(); - if (NVT == VT) { - // Type is already a power of 2. The default action is to split. - TransformToType[i] = MVT::Other; - unsigned NumElts = VT.getVectorNumElements(); - ValueTypeActions.setTypeAction(VT, - NumElts > 1 ? TypeSplitVector : TypeScalarizeVector); - } else { - TransformToType[i] = NVT; - ValueTypeActions.setTypeAction(VT, TypeWidenVector); + case TypeSplitVector: + case TypeScalarizeVector: { + MVT IntermediateVT; + MVT RegisterVT; + unsigned NumIntermediates; + NumRegistersForVT[i] = getVectorTypeBreakdownMVT(VT, IntermediateVT, + NumIntermediates, RegisterVT, this); + RegisterTypeForVT[i] = RegisterVT; + + MVT NVT = VT.getPow2VectorType(); + if (NVT == VT) { + // Type is already a power of 2. The default action is to split. + TransformToType[i] = MVT::Other; + if (PreferredAction == TypeScalarizeVector) + ValueTypeActions.setTypeAction(VT, TypeScalarizeVector); + else + ValueTypeActions.setTypeAction(VT, TypeSplitVector); + } else { + TransformToType[i] = NVT; + ValueTypeActions.setTypeAction(VT, TypeWidenVector); + } + break; + } + default: + llvm_unreachable("Unknown vector legalization action!"); } } diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index dda2259..03f4a51 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -48,16 +48,12 @@ MCSymbol *TargetLoweringObjectFileELF::getCFIPersonalitySymbol( const GlobalValue *GV, Mangler &Mang, const TargetMachine &TM, MachineModuleInfo *MMI) const { unsigned Encoding = getPersonalityEncoding(); - switch (Encoding & 0x70) { - default: - report_fatal_error("We do not support this DWARF encoding yet!"); - case dwarf::DW_EH_PE_absptr: - return TM.getSymbol(GV, Mang); - case dwarf::DW_EH_PE_pcrel: { + if ((Encoding & 0x80) == dwarf::DW_EH_PE_indirect) return getContext().GetOrCreateSymbol(StringRef("DW.ref.") + TM.getSymbol(GV, Mang)->getName()); - } - } + if ((Encoding & 0x70) == dwarf::DW_EH_PE_absptr) + return TM.getSymbol(GV, Mang); + report_fatal_error("We do not support this DWARF encoding yet!"); } void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer, @@ -196,6 +192,18 @@ getELFSectionFlags(SectionKind K) { return Flags; } +static const Comdat *getELFComdat(const GlobalValue *GV) { + const Comdat *C = GV->getComdat(); + if (!C) + return nullptr; + + if (C->getSelectionKind() != Comdat::Any) + report_fatal_error("ELF COMDATs only support SelectionKind::Any, '" + + C->getName() + "' cannot be lowered."); + + return C; +} + const MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal( const GlobalValue *GV, SectionKind Kind, Mangler &Mang, const TargetMachine &TM) const { @@ -204,14 +212,20 @@ const MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal( // Infer section flags from the section name if we can. Kind = getELFKindForNamedSection(SectionName, Kind); + StringRef Group = ""; + unsigned Flags = getELFSectionFlags(Kind); + if (const Comdat *C = getELFComdat(GV)) { + Group = C->getName(); + Flags |= ELF::SHF_GROUP; + } return getContext().getELFSection(SectionName, - getELFSectionType(SectionName, Kind), - getELFSectionFlags(Kind), Kind); + getELFSectionType(SectionName, Kind), Flags, + Kind, /*EntrySize=*/0, Group); } /// getSectionPrefixForGlobal - Return the section prefix name used by options /// FunctionsSections and DataSections. -static const char *getSectionPrefixForGlobal(SectionKind Kind) { +static StringRef getSectionPrefixForGlobal(SectionKind Kind) { if (Kind.isText()) return ".text."; if (Kind.isReadOnly()) return ".rodata."; if (Kind.isBSS()) return ".bss."; @@ -228,7 +242,6 @@ static const char *getSectionPrefixForGlobal(SectionKind Kind) { return ".data.rel.ro."; } - const MCSection *TargetLoweringObjectFileELF:: SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler &Mang, const TargetMachine &TM) const { @@ -242,18 +255,20 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, // If this global is linkonce/weak and the target handles this by emitting it // into a 'uniqued' section name, create and return the section now. - if ((GV->isWeakForLinker() || EmitUniquedSection) && + if ((GV->isWeakForLinker() || EmitUniquedSection || GV->hasComdat()) && !Kind.isCommon()) { - const char *Prefix; - Prefix = getSectionPrefixForGlobal(Kind); + StringRef Prefix = getSectionPrefixForGlobal(Kind); - SmallString<128> Name(Prefix, Prefix+strlen(Prefix)); + SmallString<128> Name(Prefix); TM.getNameWithPrefix(Name, GV, Mang, true); StringRef Group = ""; unsigned Flags = getELFSectionFlags(Kind); - if (GV->isWeakForLinker()) { - Group = Name.substr(strlen(Prefix)); + if (GV->isWeakForLinker() || GV->hasComdat()) { + if (const Comdat *C = getELFComdat(GV)) + Group = C->getName(); + else + Group = Name.substr(Prefix.size()); Flags |= ELF::SHF_GROUP; } @@ -340,7 +355,7 @@ getSectionForConstant(SectionKind Kind) const { } const MCSection *TargetLoweringObjectFileELF::getStaticCtorSection( - unsigned Priority, const MCSymbol *KeySym, const MCSection *KeySec) const { + unsigned Priority, const MCSymbol *KeySym) const { // The default scheme is .ctor / .dtor, so we have to invert the priority // numbering. if (Priority == 65535) @@ -360,7 +375,7 @@ const MCSection *TargetLoweringObjectFileELF::getStaticCtorSection( } const MCSection *TargetLoweringObjectFileELF::getStaticDtorSection( - unsigned Priority, const MCSymbol *KeySym, const MCSection *KeySec) const { + unsigned Priority, const MCSymbol *KeySym) const { // The default scheme is .ctor / .dtor, so we have to invert the priority // numbering. if (Priority == 65535) @@ -487,6 +502,15 @@ emitModuleFlags(MCStreamer &Streamer, Streamer.AddBlankLine(); } +static void checkMachOComdat(const GlobalValue *GV) { + const Comdat *C = GV->getComdat(); + if (!C) + return; + + report_fatal_error("MachO doesn't support COMDATs, '" + C->getName() + + "' cannot be lowered."); +} + const MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal( const GlobalValue *GV, SectionKind Kind, Mangler &Mang, const TargetMachine &TM) const { @@ -494,6 +518,9 @@ const MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal( StringRef Segment, Section; unsigned TAA = 0, StubSize = 0; bool TAAParsed; + + checkMachOComdat(GV); + std::string ErrorCode = MCSectionMachO::ParseSectionSpecifier(GV->getSection(), Segment, Section, TAA, TAAParsed, StubSize); @@ -564,6 +591,7 @@ bool TargetLoweringObjectFileMachO::isSectionAtomizableBySymbols( const MCSection *TargetLoweringObjectFileMachO:: SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler &Mang, const TargetMachine &TM) const { + checkMachOComdat(GV); // Handle thread local data. if (Kind.isThreadBSS()) return TLSBSSSection; @@ -732,6 +760,50 @@ getCOFFSectionFlags(SectionKind K) { return Flags; } +static const GlobalValue *getComdatGVForCOFF(const GlobalValue *GV) { + const Comdat *C = GV->getComdat(); + assert(C && "expected GV to have a Comdat!"); + + StringRef ComdatGVName = C->getName(); + const GlobalValue *ComdatGV = GV->getParent()->getNamedValue(ComdatGVName); + if (!ComdatGV) + report_fatal_error("Associative COMDAT symbol '" + ComdatGVName + + "' does not exist."); + + if (ComdatGV->getComdat() != C) + report_fatal_error("Associative COMDAT symbol '" + ComdatGVName + + "' is not a key for it's COMDAT."); + + return ComdatGV; +} + +static int getSelectionForCOFF(const GlobalValue *GV) { + if (const Comdat *C = GV->getComdat()) { + const GlobalValue *ComdatKey = getComdatGVForCOFF(GV); + if (const auto *GA = dyn_cast<GlobalAlias>(ComdatKey)) + ComdatKey = GA->getBaseObject(); + if (ComdatKey == GV) { + switch (C->getSelectionKind()) { + case Comdat::Any: + return COFF::IMAGE_COMDAT_SELECT_ANY; + case Comdat::ExactMatch: + return COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH; + case Comdat::Largest: + return COFF::IMAGE_COMDAT_SELECT_LARGEST; + case Comdat::NoDuplicates: + return COFF::IMAGE_COMDAT_SELECT_NODUPLICATES; + case Comdat::SameSize: + return COFF::IMAGE_COMDAT_SELECT_SAME_SIZE; + } + } else { + return COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE; + } + } else if (GV->isWeakForLinker()) { + return COFF::IMAGE_COMDAT_SELECT_ANY; + } + return 0; +} + const MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal( const GlobalValue *GV, SectionKind Kind, Mangler &Mang, const TargetMachine &TM) const { @@ -739,11 +811,21 @@ const MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal( unsigned Characteristics = getCOFFSectionFlags(Kind); StringRef Name = GV->getSection(); StringRef COMDATSymName = ""; - if (GV->isWeakForLinker()) { - Selection = COFF::IMAGE_COMDAT_SELECT_ANY; - Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; - MCSymbol *Sym = TM.getSymbol(GV, Mang); - COMDATSymName = Sym->getName(); + if ((GV->isWeakForLinker() || GV->hasComdat()) && !Kind.isCommon()) { + Selection = getSelectionForCOFF(GV); + const GlobalValue *ComdatGV; + if (Selection == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) + ComdatGV = getComdatGVForCOFF(GV); + else + ComdatGV = GV; + + if (!ComdatGV->hasPrivateLinkage()) { + MCSymbol *Sym = TM.getSymbol(ComdatGV, Mang); + COMDATSymName = Sym->getName(); + Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; + } else { + Selection = 0; + } } return getContext().getCOFFSection(Name, Characteristics, @@ -780,17 +862,27 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, // into a 'uniqued' section name, create and return the section now. // Section names depend on the name of the symbol which is not feasible if the // symbol has private linkage. - if ((GV->isWeakForLinker() || EmitUniquedSection) && - !GV->hasPrivateLinkage() && !Kind.isCommon()) { + if ((GV->isWeakForLinker() || EmitUniquedSection || GV->hasComdat()) && + !Kind.isCommon()) { const char *Name = getCOFFSectionNameForUniqueGlobal(Kind); unsigned Characteristics = getCOFFSectionFlags(Kind); Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; - MCSymbol *Sym = TM.getSymbol(GV, Mang); - return getContext().getCOFFSection( - Name, Characteristics, Kind, Sym->getName(), - GV->isWeakForLinker() ? COFF::IMAGE_COMDAT_SELECT_ANY - : COFF::IMAGE_COMDAT_SELECT_NODUPLICATES); + int Selection = getSelectionForCOFF(GV); + if (!Selection) + Selection = COFF::IMAGE_COMDAT_SELECT_NODUPLICATES; + const GlobalValue *ComdatGV; + if (GV->hasComdat()) + ComdatGV = getComdatGVForCOFF(GV); + else + ComdatGV = GV; + + if (!ComdatGV->hasPrivateLinkage()) { + MCSymbol *Sym = TM.getSymbol(ComdatGV, Mang); + StringRef COMDATSymName = Sym->getName(); + return getContext().getCOFFSection(Name, Characteristics, Kind, + COMDATSymName, Selection); + } } if (Kind.isText()) @@ -868,8 +960,7 @@ emitModuleFlags(MCStreamer &Streamer, static const MCSection *getAssociativeCOFFSection(MCContext &Ctx, const MCSection *Sec, - const MCSymbol *KeySym, - const MCSection *KeySec) { + const MCSymbol *KeySym) { // Return the normal section if we don't have to be associative. if (!KeySym) return Sec; @@ -877,20 +968,19 @@ static const MCSection *getAssociativeCOFFSection(MCContext &Ctx, // Make an associative section with the same name and kind as the normal // section. const MCSectionCOFF *SecCOFF = cast<MCSectionCOFF>(Sec); - const MCSectionCOFF *KeySecCOFF = cast<MCSectionCOFF>(KeySec); unsigned Characteristics = SecCOFF->getCharacteristics() | COFF::IMAGE_SCN_LNK_COMDAT; return Ctx.getCOFFSection(SecCOFF->getSectionName(), Characteristics, SecCOFF->getKind(), KeySym->getName(), - COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE, KeySecCOFF); + COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE); } const MCSection *TargetLoweringObjectFileCOFF::getStaticCtorSection( - unsigned Priority, const MCSymbol *KeySym, const MCSection *KeySec) const { - return getAssociativeCOFFSection(getContext(), StaticCtorSection, KeySym, KeySec); + unsigned Priority, const MCSymbol *KeySym) const { + return getAssociativeCOFFSection(getContext(), StaticCtorSection, KeySym); } const MCSection *TargetLoweringObjectFileCOFF::getStaticDtorSection( - unsigned Priority, const MCSymbol *KeySym, const MCSection *KeySec) const { - return getAssociativeCOFFSection(getContext(), StaticDtorSection, KeySym, KeySec); + unsigned Priority, const MCSymbol *KeySym) const { + return getAssociativeCOFFSection(getContext(), StaticDtorSection, KeySym); } |