diff options
Diffstat (limited to 'lib/Target/NVPTX')
31 files changed, 480 insertions, 678 deletions
diff --git a/lib/Target/NVPTX/LLVMBuild.txt b/lib/Target/NVPTX/LLVMBuild.txt index bc8d82e..6ea244a 100644 --- a/lib/Target/NVPTX/LLVMBuild.txt +++ b/lib/Target/NVPTX/LLVMBuild.txt @@ -28,5 +28,5 @@ has_asmprinter = 1 type = Library name = NVPTXCodeGen parent = NVPTX -required_libraries = Analysis AsmPrinter CodeGen Core MC NVPTXAsmPrinter NVPTXDesc NVPTXInfo Scalar SelectionDAG Support Target +required_libraries = Analysis AsmPrinter CodeGen Core MC NVPTXAsmPrinter NVPTXDesc NVPTXInfo Scalar SelectionDAG Support Target TransformUtils add_to_library_groups = NVPTX diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp index 4fd5bdd..11d737e 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp @@ -50,5 +50,6 @@ NVPTXMCAsmInfo::NVPTXMCAsmInfo(StringRef TT) { AscizDirective = " .b8"; // @TODO: Can we just disable this? + WeakDirective = "\t// .weak\t"; GlobalDirective = "\t// .globl\t"; } diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h index 13ba57e..382525d 100644 --- a/lib/Target/NVPTX/NVPTX.h +++ b/lib/Target/NVPTX/NVPTX.h @@ -59,9 +59,8 @@ inline static const char *NVPTXCondCodeToString(NVPTXCC::CondCodes CC) { llvm_unreachable("Unknown condition code"); } -ImmutablePass *createNVPTXTargetTransformInfoPass(const NVPTXTargetMachine *TM); -FunctionPass * -createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel); +FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM, + llvm::CodeGenOpt::Level OptLevel); ModulePass *createNVPTXAssignValidGlobalNamesPass(); ModulePass *createGenericToNVVMPass(); FunctionPass *createNVPTXFavorNonGenericAddrSpacesPass(); diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.h b/lib/Target/NVPTX/NVPTXAllocaHoisting.h index 69fc86e..c343980 100644 --- a/lib/Target/NVPTX/NVPTXAllocaHoisting.h +++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.h @@ -15,6 +15,7 @@ #define LLVM_LIB_TARGET_NVPTX_NVPTXALLOCAHOISTING_H #include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/StackProtector.h" #include "llvm/IR/DataLayout.h" #include "llvm/Pass.h" @@ -32,8 +33,8 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<DataLayoutPass>(); - AU.addPreserved("stack-protector"); AU.addPreserved<MachineFunctionAnalysis>(); + AU.addPreserved<StackProtector>(); } const char *getPassName() const override { diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 35ba4f1..833db04 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -17,8 +17,8 @@ #include "MCTargetDesc/NVPTXMCAsmInfo.h" #include "NVPTX.h" #include "NVPTXInstrInfo.h" -#include "NVPTXMachineFunctionInfo.h" #include "NVPTXMCExpr.h" +#include "NVPTXMachineFunctionInfo.h" #include "NVPTXRegisterInfo.h" #include "NVPTXTargetMachine.h" #include "NVPTXUtilities.h" @@ -27,6 +27,7 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/DebugInfo.h" @@ -45,6 +46,7 @@ #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/TimeValue.h" #include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Transforms/Utils/UnrollLoop.h" #include <sstream> using namespace llvm; @@ -108,160 +110,6 @@ void VisitGlobalVariableForEmission( } } -// @TODO: This is a copy from AsmPrinter.cpp. The function is static, so we -// cannot just link to the existing version. -/// LowerConstant - Lower the specified LLVM Constant to an MCExpr. -/// -using namespace nvptx; -const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { - MCContext &Ctx = AP.OutContext; - - if (CV->isNullValue() || isa<UndefValue>(CV)) - return MCConstantExpr::Create(0, Ctx); - - if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) - return MCConstantExpr::Create(CI->getZExtValue(), Ctx); - - if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) - return MCSymbolRefExpr::Create(AP.getSymbol(GV), Ctx); - - if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) - return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx); - - const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV); - if (!CE) - llvm_unreachable("Unknown constant value to lower!"); - - switch (CE->getOpcode()) { - default: - // If the code isn't optimized, there may be outstanding folding - // opportunities. Attempt to fold the expression using DataLayout as a - // last resort before giving up. - if (Constant *C = ConstantFoldConstantExpression( - CE, AP.TM.getSubtargetImpl()->getDataLayout())) - if (C != CE) - return LowerConstant(C, AP); - - // Otherwise report the problem to the user. - { - std::string S; - raw_string_ostream OS(S); - OS << "Unsupported expression in static initializer: "; - CE->printAsOperand(OS, /*PrintType=*/ false, - !AP.MF ? nullptr : AP.MF->getFunction()->getParent()); - report_fatal_error(OS.str()); - } - case Instruction::AddrSpaceCast: { - // Strip any addrspace(1)->addrspace(0) addrspace casts. These will be - // handled by the generic() logic in the MCExpr printer - PointerType *DstTy = cast<PointerType>(CE->getType()); - PointerType *SrcTy = cast<PointerType>(CE->getOperand(0)->getType()); - if (SrcTy->getAddressSpace() == 1 && DstTy->getAddressSpace() == 0) { - return LowerConstant(cast<const Constant>(CE->getOperand(0)), AP); - } - std::string S; - raw_string_ostream OS(S); - OS << "Unsupported expression in static initializer: "; - CE->printAsOperand(OS, /*PrintType=*/ false, - !AP.MF ? nullptr : AP.MF->getFunction()->getParent()); - report_fatal_error(OS.str()); - } - case Instruction::GetElementPtr: { - const DataLayout &TD = *AP.TM.getSubtargetImpl()->getDataLayout(); - // Generate a symbolic expression for the byte address - APInt OffsetAI(TD.getPointerSizeInBits(), 0); - cast<GEPOperator>(CE)->accumulateConstantOffset(TD, OffsetAI); - - const MCExpr *Base = LowerConstant(CE->getOperand(0), AP); - if (!OffsetAI) - return Base; - - int64_t Offset = OffsetAI.getSExtValue(); - return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx), - Ctx); - } - - case Instruction::Trunc: - // We emit the value and depend on the assembler to truncate the generated - // expression properly. This is important for differences between - // blockaddress labels. Since the two labels are in the same function, it - // is reasonable to treat their delta as a 32-bit value. - // FALL THROUGH. - case Instruction::BitCast: - return LowerConstant(CE->getOperand(0), AP); - - case Instruction::IntToPtr: { - const DataLayout &TD = *AP.TM.getSubtargetImpl()->getDataLayout(); - // Handle casts to pointers by changing them into casts to the appropriate - // integer type. This promotes constant folding and simplifies this code. - Constant *Op = CE->getOperand(0); - Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()), - false /*ZExt*/); - return LowerConstant(Op, AP); - } - - case Instruction::PtrToInt: { - const DataLayout &TD = *AP.TM.getSubtargetImpl()->getDataLayout(); - // Support only foldable casts to/from pointers that can be eliminated by - // changing the pointer to the appropriately sized integer type. - Constant *Op = CE->getOperand(0); - Type *Ty = CE->getType(); - - const MCExpr *OpExpr = LowerConstant(Op, AP); - - // We can emit the pointer value into this slot if the slot is an - // integer slot equal to the size of the pointer. - if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType())) - return OpExpr; - - // Otherwise the pointer is smaller than the resultant integer, mask off - // the high bits so we are sure to get a proper truncation if the input is - // a constant expr. - unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType()); - const MCExpr *MaskExpr = - MCConstantExpr::Create(~0ULL >> (64 - InBits), Ctx); - return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx); - } - - // The MC library also has a right-shift operator, but it isn't consistently - // signed or unsigned between different targets. - case Instruction::Add: - case Instruction::Sub: - case Instruction::Mul: - case Instruction::SDiv: - case Instruction::SRem: - case Instruction::Shl: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: { - const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP); - const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP); - switch (CE->getOpcode()) { - default: - llvm_unreachable("Unknown binary operator constant cast expr"); - case Instruction::Add: - return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx); - case Instruction::Sub: - return MCBinaryExpr::CreateSub(LHS, RHS, Ctx); - case Instruction::Mul: - return MCBinaryExpr::CreateMul(LHS, RHS, Ctx); - case Instruction::SDiv: - return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx); - case Instruction::SRem: - return MCBinaryExpr::CreateMod(LHS, RHS, Ctx); - case Instruction::Shl: - return MCBinaryExpr::CreateShl(LHS, RHS, Ctx); - case Instruction::And: - return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx); - case Instruction::Or: - return MCBinaryExpr::CreateOr(LHS, RHS, Ctx); - case Instruction::Xor: - return MCBinaryExpr::CreateXor(LHS, RHS, Ctx); - } - } - } -} - void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) { if (!EmitLineNumbers) return; @@ -316,7 +164,7 @@ void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) { void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { SmallString<128> Str; raw_svector_ostream OS(Str); - if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) + if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) emitLineNumberAsDotLoc(*MI); MCInst Inst; @@ -389,8 +237,6 @@ void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) { void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) { OutMI.setOpcode(MI->getOpcode()); - const NVPTXSubtarget &ST = TM.getSubtarget<NVPTXSubtarget>(); - // Special: Do not mangle symbol operand of CALL_PROTOTYPE if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) { const MachineOperand &MO = MI->getOperand(0); @@ -403,7 +249,7 @@ void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) { const MachineOperand &MO = MI->getOperand(i); MCOperand MCOp; - if (!ST.hasImageHandles()) { + if (!nvptxSubtarget->hasImageHandles()) { if (lowerImageHandleOperand(MI, i, MCOp)) { OutMI.addOperand(MCOp); continue; @@ -500,12 +346,12 @@ MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) { } void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { - const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout(); - const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); + const DataLayout *TD = TM.getDataLayout(); + const TargetLowering *TLI = nvptxSubtarget->getTargetLowering(); Type *Ty = F->getReturnType(); - bool isABI = (nvptxSubtarget.getSmVersion() >= 20); + bool isABI = (nvptxSubtarget->getSmVersion() >= 20); if (Ty->getTypeID() == Type::VoidTyID) return; @@ -528,17 +374,15 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { } else if (isa<PointerType>(Ty)) { O << ".param .b" << TLI->getPointerTy().getSizeInBits() << " func_retval0"; - } else { - if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) { - unsigned totalsz = TD->getTypeAllocSize(Ty); - unsigned retAlignment = 0; - if (!llvm::getAlign(*F, 0, retAlignment)) - retAlignment = TD->getABITypeAlignment(Ty); - O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz - << "]"; - } else - assert(false && "Unknown return type"); - } + } else if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) { + unsigned totalsz = TD->getTypeAllocSize(Ty); + unsigned retAlignment = 0; + if (!llvm::getAlign(*F, 0, retAlignment)) + retAlignment = TD->getABITypeAlignment(Ty); + O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz + << "]"; + } else + llvm_unreachable("Unknown return type"); } else { SmallVector<EVT, 16> vtparts; ComputeValueVTs(*TLI, Ty, vtparts); @@ -574,6 +418,42 @@ void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF, printReturnValStr(F, O); } +// Return true if MBB is the header of a loop marked with +// llvm.loop.unroll.disable. +// TODO: consider "#pragma unroll 1" which is equivalent to "#pragma nounroll". +bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll( + const MachineBasicBlock &MBB) const { + MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>(); + // TODO: isLoopHeader() should take "const MachineBasicBlock *". + // We insert .pragma "nounroll" only to the loop header. + if (!LI.isLoopHeader(const_cast<MachineBasicBlock *>(&MBB))) + return false; + + // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore, + // we iterate through each back edge of the loop with header MBB, and check + // whether its metadata contains llvm.loop.unroll.disable. + for (auto I = MBB.pred_begin(); I != MBB.pred_end(); ++I) { + const MachineBasicBlock *PMBB = *I; + if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) { + // Edges from other loops to MBB are not back edges. + continue; + } + if (const BasicBlock *PBB = PMBB->getBasicBlock()) { + if (MDNode *LoopID = PBB->getTerminator()->getMetadata("llvm.loop")) { + if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable")) + return true; + } + } + } + return false; +} + +void NVPTXAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const { + AsmPrinter::EmitBasicBlockStart(MBB); + if (isLoopHeaderOfNoUnroll(MBB)) + OutStreamer.EmitRawText(StringRef("\t.pragma \"nounroll\";\n")); +} + void NVPTXAsmPrinter::EmitFunctionEntryLabel() { SmallString<128> Str; raw_svector_ostream O(Str); @@ -624,14 +504,13 @@ void NVPTXAsmPrinter::EmitFunctionBodyEnd() { void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const { unsigned RegNo = MI->getOperand(0).getReg(); - const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo(); + const TargetRegisterInfo *TRI = nvptxSubtarget->getRegisterInfo(); if (TRI->isVirtualRegister(RegNo)) { OutStreamer.AddComment(Twine("implicit-def: ") + getVirtualRegisterName(RegNo)); } else { - OutStreamer.AddComment( - Twine("implicit-def: ") + - TM.getSubtargetImpl()->getRegisterInfo()->getName(RegNo)); + OutStreamer.AddComment(Twine("implicit-def: ") + + nvptxSubtarget->getRegisterInfo()->getName(RegNo)); } OutStreamer.AddBlankLine(); } @@ -793,11 +672,6 @@ static bool usedInOneFunc(const User *U, Function const *&oneFunc) { return false; } - if (const MDNode *md = dyn_cast<MDNode>(U)) - if (md->hasName() && ((md->getName().str() == "llvm.dbg.gv") || - (md->getName().str() == "llvm.dbg.sp"))) - return true; - for (const User *UU : U->users()) if (usedInOneFunc(UU, oneFunc) == false) return false; @@ -938,6 +812,14 @@ void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) { } bool NVPTXAsmPrinter::doInitialization(Module &M) { + // Construct a default subtarget off of the TargetMachine defaults. The + // rest of NVPTX isn't friendly to change subtargets per function and + // so the default TargetMachine will have all of the options. + StringRef TT = TM.getTargetTriple(); + StringRef CPU = TM.getTargetCPU(); + StringRef FS = TM.getTargetFeatureString(); + const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM); + const NVPTXSubtarget STI(TT, CPU, FS, NTM); SmallString<128> Str1; raw_svector_ostream OS1(Str1); @@ -952,10 +834,10 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) { const_cast<TargetLoweringObjectFile &>(getObjFileLowering()) .Initialize(OutContext, TM); - Mang = new Mangler(TM.getSubtargetImpl()->getDataLayout()); + Mang = new Mangler(TM.getDataLayout()); // Emit header before any dwarf directives are emitted below. - emitHeader(M, OS1); + emitHeader(M, OS1, STI); OutStreamer.EmitRawText(OS1.str()); // Already commented out @@ -971,7 +853,8 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) { OutStreamer.AddBlankLine(); } - if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) + // If we're not NVCL we're CUDA, go ahead and emit filenames. + if (Triple(TM.getTargetTriple()).getOS() != Triple::NVCL) recordAndEmitFilenames(M); GlobalsEmitted = false; @@ -1012,22 +895,24 @@ void NVPTXAsmPrinter::emitGlobals(const Module &M) { OutStreamer.EmitRawText(OS2.str()); } -void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) { +void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O, + const NVPTXSubtarget &STI) { O << "//\n"; O << "// Generated by LLVM NVPTX Back-End\n"; O << "//\n"; O << "\n"; - unsigned PTXVersion = nvptxSubtarget.getPTXVersion(); + unsigned PTXVersion = STI.getPTXVersion(); O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n"; O << ".target "; - O << nvptxSubtarget.getTargetName(); + O << STI.getTargetName(); - if (nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) + const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM); + if (NTM.getDrvInterface() == NVPTX::NVCL) O << ", texmode_independent"; - if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) { - if (!nvptxSubtarget.hasDouble()) + else { + if (!STI.hasDouble()) O << ", map_f64_to_f32"; } @@ -1037,7 +922,7 @@ void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) { O << "\n"; O << ".address_size "; - if (nvptxSubtarget.is64Bit()) + if (NTM.is64Bit()) O << "64"; else O << "32"; @@ -1047,7 +932,6 @@ void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) { } bool NVPTXAsmPrinter::doFinalization(Module &M) { - // If we did not emit any functions, then the global declarations have not // yet been emitted. if (!GlobalsEmitted) { @@ -1109,7 +993,7 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) { void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V, raw_ostream &O) { - if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) { + if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) { if (V->hasExternalLinkage()) { if (isa<GlobalVariable>(V)) { const GlobalVariable *GVar = cast<GlobalVariable>(V); @@ -1153,7 +1037,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, GVar->getName().startswith("nvvm.")) return; - const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout(); + const DataLayout *TD = TM.getDataLayout(); // GlobalVariables are always constant pointers themselves. const PointerType *PTy = GVar->getType(); @@ -1287,7 +1171,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, else O << " .align " << GVar->getAlignment(); - if (ETy->isSingleValueType()) { + if (ETy->isFloatingPointTy() || ETy->isIntegerTy() || ETy->isPointerTy()) { O << " ."; // Special case: ABI requires that we use .u8 for predicates if (ETy->isIntegerTy(1)) @@ -1341,7 +1225,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, AggBuffer aggBuffer(ElementSize, O, *this); bufferAggregateConstant(Initializer, &aggBuffer); if (aggBuffer.numSymbols) { - if (nvptxSubtarget.is64Bit()) { + if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit()) { O << " .u64 " << *getSymbol(GVar) << "["; O << ElementSize / 8; } else { @@ -1439,7 +1323,7 @@ NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const { case Type::DoubleTyID: return "f64"; case Type::PointerTyID: - if (nvptxSubtarget.is64Bit()) + if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit()) if (useB4PTR) return "b64"; else @@ -1456,7 +1340,7 @@ NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const { void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O) { - const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout(); + const DataLayout *TD = TM.getDataLayout(); // GlobalVariables are always constant pointers themselves. const PointerType *PTy = GVar->getType(); @@ -1469,7 +1353,7 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, else O << " .align " << GVar->getAlignment(); - if (ETy->isSingleValueType()) { + if (ETy->isFloatingPointTy() || ETy->isIntegerTy() || ETy->isPointerTy()) { O << " ."; O << getPTXFundamentalTypeStr(ETy); O << " "; @@ -1508,17 +1392,6 @@ static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) { if (ATy) return getOpenCLAlignment(TD, ATy->getElementType()); - const VectorType *VTy = dyn_cast<VectorType>(Ty); - if (VTy) { - Type *ETy = VTy->getElementType(); - unsigned int numE = VTy->getNumElements(); - unsigned int alignE = TD->getPrefTypeAlignment(ETy); - if (numE == 3) - return 4 * alignE; - else - return numE * alignE; - } - const StructType *STy = dyn_cast<StructType>(Ty); if (STy) { unsigned int alignStruct = 1; @@ -1541,50 +1414,22 @@ static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) { void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I, int paramIndex, raw_ostream &O) { - if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) || - (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) - O << *getSymbol(I->getParent()) << "_param_" << paramIndex; - else { - std::string argName = I->getName(); - const char *p = argName.c_str(); - while (*p) { - if (*p == '.') - O << "_"; - else - O << *p; - p++; - } - } + O << *getSymbol(I->getParent()) << "_param_" << paramIndex; } void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) { - Function::const_arg_iterator I, E; - int i = 0; - - if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) || - (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) { - O << *CurrentFnSym << "_param_" << paramIndex; - return; - } - - for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, i++) { - if (i == paramIndex) { - printParamName(I, paramIndex, O); - return; - } - } - llvm_unreachable("paramIndex out of bound"); + O << *CurrentFnSym << "_param_" << paramIndex; } void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { - const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout(); + const DataLayout *TD = TM.getDataLayout(); const AttributeSet &PAL = F->getAttributes(); - const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); + const TargetLowering *TLI = nvptxSubtarget->getTargetLowering(); Function::const_arg_iterator I, E; unsigned paramIndex = 0; bool first = true; bool isKernelFunc = llvm::isKernelFunction(*F); - bool isABI = (nvptxSubtarget.getSmVersion() >= 20); + bool isABI = (nvptxSubtarget->getSmVersion() >= 20); MVT thePointerTy = TLI->getPointerTy(); O << "(\n"; @@ -1603,21 +1448,21 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { if (isImage(*I)) { std::string sname = I->getName(); if (isImageWriteOnly(*I) || isImageReadWrite(*I)) { - if (nvptxSubtarget.hasImageHandles()) + if (nvptxSubtarget->hasImageHandles()) O << "\t.param .u64 .ptr .surfref "; else O << "\t.param .surfref "; O << *CurrentFnSym << "_param_" << paramIndex; } else { // Default image is read_only - if (nvptxSubtarget.hasImageHandles()) + if (nvptxSubtarget->hasImageHandles()) O << "\t.param .u64 .ptr .texref "; else O << "\t.param .texref "; O << *CurrentFnSym << "_param_" << paramIndex; } } else { - if (nvptxSubtarget.hasImageHandles()) + if (nvptxSubtarget->hasImageHandles()) O << "\t.param .u64 .ptr .samplerref "; else O << "\t.param .samplerref "; @@ -1650,7 +1495,8 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { // Special handling for pointer arguments to kernel O << "\t.param .u" << thePointerTy.getSizeInBits() << " "; - if (nvptxSubtarget.getDrvInterface() != NVPTX::CUDA) { + if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() != + NVPTX::CUDA) { Type *ETy = PTy->getElementType(); int addrSpace = PTy->getAddressSpace(); switch (addrSpace) { @@ -1779,7 +1625,7 @@ void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters( if (NumBytes) { O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" << DEPOTNAME << getFunctionNumber() << "[" << NumBytes << "];\n"; - if (nvptxSubtarget.is64Bit()) { + if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) { O << "\t.reg .b64 \t%SP;\n"; O << "\t.reg .b64 \t%SPL;\n"; } else { @@ -1900,7 +1746,7 @@ void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) { } return; } else { - O << *LowerConstant(CPV, *this); + O << *lowerConstant(CPV); return; } } @@ -1910,7 +1756,7 @@ void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) { void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, AggBuffer *aggBuffer) { - const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout(); + const DataLayout *TD = TM.getDataLayout(); if (isa<UndefValue>(CPV) || CPV->isNullValue()) { int s = TD->getTypeAllocSize(CPV->getType()); @@ -2034,7 +1880,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV, AggBuffer *aggBuffer) { - const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout(); + const DataLayout *TD = TM.getDataLayout(); int Bytes; // Old constants diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h index 83fa5d3..7e6b5e8 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.h +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h @@ -39,13 +39,6 @@ // A better approach is to clone the MCAsmStreamer to a MCPTXAsmStreamer // (subclass of MCStreamer). -// This is defined in AsmPrinter.cpp. -// Used to process the constant expressions in initializers. -namespace nvptx { -const llvm::MCExpr * -LowerConstant(const llvm::Constant *CV, llvm::AsmPrinter &AP); -} - namespace llvm { class LineReader { @@ -145,7 +138,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { unsigned int nSym = 0; unsigned int nextSymbolPos = symbolPosInBuffer[nSym]; unsigned int nBytes = 4; - if (AP.nvptxSubtarget.is64Bit()) + if (static_cast<const NVPTXTargetMachine &>(AP.TM).is64Bit()) nBytes = 8; for (pos = 0; pos < size; pos += nBytes) { if (pos) @@ -167,7 +160,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { O << *Name; } } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(v)) { - O << *nvptx::LowerConstant(Cexpr, AP); + O << *AP.lowerConstant(Cexpr); } else llvm_unreachable("symbol type unknown"); nSym++; @@ -194,6 +187,7 @@ private: const Function *F; std::string CurrentFnName; + void EmitBasicBlockStart(const MachineBasicBlock &MBB) const override; void EmitFunctionEntryLabel() override; void EmitFunctionBodyStart() override; void EmitFunctionBodyEnd() override; @@ -218,7 +212,7 @@ private: void printParamName(Function::const_arg_iterator I, int paramIndex, raw_ostream &O); void emitGlobals(const Module &M); - void emitHeader(Module &M, raw_ostream &O); + void emitHeader(Module &M, raw_ostream &O, const NVPTXSubtarget &STI); void emitKernelFunctionDirectives(const Function &F, raw_ostream &O) const; void emitVirtualRegister(unsigned int vr, raw_ostream &); void emitFunctionExternParamList(const MachineFunction &MF); @@ -254,8 +248,10 @@ private: typedef DenseMap<unsigned, unsigned> VRegMap; typedef DenseMap<const TargetRegisterClass *, VRegMap> VRegRCMap; VRegRCMap VRegMapping; - // cache the subtarget here. - const NVPTXSubtarget &nvptxSubtarget; + + // Cache the subtarget here. + const NVPTXSubtarget *nvptxSubtarget; + // Build the map between type name and ID based on module's type // symbol table. std::map<const Type *, std::string> TypeNameMap; @@ -288,6 +284,8 @@ private: MCOperand &MCOp); void lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp); + bool isLoopHeaderOfNoUnroll(const MachineBasicBlock &MBB) const; + LineReader *reader; LineReader *getReader(std::string); @@ -305,12 +303,12 @@ private: bool EmitGeneric; public: - NVPTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : AsmPrinter(TM, Streamer), - nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) { + NVPTXAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer) + : AsmPrinter(TM, std::move(Streamer)), + EmitGeneric(static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == + NVPTX::CUDA) { CurrentBankselLabelInBasicBlock = ""; reader = nullptr; - EmitGeneric = (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA); } ~NVPTXAsmPrinter() { @@ -318,6 +316,15 @@ public: delete reader; } + bool runOnMachineFunction(MachineFunction &F) override { + nvptxSubtarget = &F.getSubtarget<NVPTXSubtarget>(); + return AsmPrinter::runOnMachineFunction(F); + } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<MachineLoopInfo>(); + AsmPrinter::getAnalysisUsage(AU); + } + bool ignoreLoc(const MachineInstr &); std::string getVirtualRegisterName(unsigned) const; diff --git a/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp b/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp index 962b123..7d4be8e 100644 --- a/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp +++ b/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp @@ -19,8 +19,8 @@ #include "NVPTX.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" -#include "llvm/PassManager.h" #include "llvm/Support/raw_ostream.h" #include <string> diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/lib/Target/NVPTX/NVPTXFrameLowering.cpp index 314df38..34d3a66 100644 --- a/lib/Target/NVPTX/NVPTXFrameLowering.cpp +++ b/lib/Target/NVPTX/NVPTXFrameLowering.cpp @@ -26,9 +26,8 @@ using namespace llvm; -NVPTXFrameLowering::NVPTXFrameLowering(NVPTXSubtarget &STI) - : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0), - is64bit(STI.is64Bit()) {} +NVPTXFrameLowering::NVPTXFrameLowering() + : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0) {} bool NVPTXFrameLowering::hasFP(const MachineFunction &MF) const { return true; } @@ -45,7 +44,7 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const { // mov %SPL, %depot; // cvta.local %SP, %SPL; - if (is64bit) { + if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) { unsigned LocalReg = MRI.createVirtualRegister(&NVPTX::Int64RegsRegClass); MachineInstr *MI = BuildMI(MBB, MBBI, dl, MF.getSubtarget().getInstrInfo()->get( diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.h b/lib/Target/NVPTX/NVPTXFrameLowering.h index 0846b78..d1e0a5c 100644 --- a/lib/Target/NVPTX/NVPTXFrameLowering.h +++ b/lib/Target/NVPTX/NVPTXFrameLowering.h @@ -19,18 +19,16 @@ namespace llvm { class NVPTXSubtarget; class NVPTXFrameLowering : public TargetFrameLowering { - bool is64bit; - public: - explicit NVPTXFrameLowering(NVPTXSubtarget &STI); + explicit NVPTXFrameLowering(); bool hasFP(const MachineFunction &MF) const override; void emitPrologue(MachineFunction &MF) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const override; + void + eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const override; }; } // End llvm namespace diff --git a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp index 58fa95b..86d134b 100644 --- a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp +++ b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp @@ -22,10 +22,11 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/ValueMap.h" -#include "llvm/PassManager.h" +#include "llvm/Transforms/Utils/ValueMapper.h" using namespace llvm; @@ -54,8 +55,7 @@ private: IRBuilder<> &Builder); Value *remapConstantExpr(Module *M, Function *F, ConstantExpr *C, IRBuilder<> &Builder); - void remapNamedMDNode(Module *M, NamedMDNode *N); - MDNode *remapMDNode(Module *M, MDNode *N); + void remapNamedMDNode(ValueToValueMapTy &VM, NamedMDNode *N); typedef ValueMap<GlobalVariable *, GlobalVariable *> GVMapTy; typedef ValueMap<Constant *, Value *> ConstantToValueMapTy; @@ -125,12 +125,17 @@ bool GenericToNVVM::runOnModule(Module &M) { ConstantToValueMap.clear(); } + // Copy GVMap over to a standard value map. + ValueToValueMapTy VM; + for (auto I = GVMap.begin(), E = GVMap.end(); I != E; ++I) + VM[I->first] = I->second; + // Walk through the metadata section and update the debug information // associated with the global variables in the default address space. for (Module::named_metadata_iterator I = M.named_metadata_begin(), E = M.named_metadata_end(); I != E; I++) { - remapNamedMDNode(&M, I); + remapNamedMDNode(VM, I); } // Walk through the global variable initializers, and replace any use of @@ -362,7 +367,7 @@ Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C, } } -void GenericToNVVM::remapNamedMDNode(Module *M, NamedMDNode *N) { +void GenericToNVVM::remapNamedMDNode(ValueToValueMapTy &VM, NamedMDNode *N) { bool OperandChanged = false; SmallVector<MDNode *, 16> NewOperands; @@ -372,7 +377,7 @@ void GenericToNVVM::remapNamedMDNode(Module *M, NamedMDNode *N) { // converted to another value. for (unsigned i = 0; i < NumOperands; ++i) { MDNode *Operand = N->getOperand(i); - MDNode *NewOperand = remapMDNode(M, Operand); + MDNode *NewOperand = MapMetadata(Operand, VM); OperandChanged |= Operand != NewOperand; NewOperands.push_back(NewOperand); } @@ -390,47 +395,3 @@ void GenericToNVVM::remapNamedMDNode(Module *M, NamedMDNode *N) { N->addOperand(*I); } } - -MDNode *GenericToNVVM::remapMDNode(Module *M, MDNode *N) { - - bool OperandChanged = false; - SmallVector<Value *, 8> NewOperands; - unsigned NumOperands = N->getNumOperands(); - - // Check if any operand is or contains a global variable in GVMap, and thus - // converted to another value. - for (unsigned i = 0; i < NumOperands; ++i) { - Value *Operand = N->getOperand(i); - Value *NewOperand = Operand; - if (Operand) { - if (isa<GlobalVariable>(Operand)) { - GVMapTy::iterator I = GVMap.find(cast<GlobalVariable>(Operand)); - if (I != GVMap.end()) { - NewOperand = I->second; - if (++i < NumOperands) { - NewOperands.push_back(NewOperand); - // Address space of the global variable follows the global variable - // in the global variable debug info (see createGlobalVariable in - // lib/Analysis/DIBuilder.cpp). - NewOperand = - ConstantInt::get(Type::getInt32Ty(M->getContext()), - I->second->getType()->getAddressSpace()); - } - } - } else if (isa<MDNode>(Operand)) { - NewOperand = remapMDNode(M, cast<MDNode>(Operand)); - } - } - OperandChanged |= Operand != NewOperand; - NewOperands.push_back(NewOperand); - } - - // If none of the operands has been modified, return N as it is. - if (!OperandChanged) { - return N; - } - - // If any of the operands has been modified, create a new MDNode with the new - // operands. - return MDNode::get(M->getContext(), makeArrayRef(NewOperands)); -} diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index cd0422d..e01c780 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -50,11 +50,15 @@ FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM, NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm, CodeGenOpt::Level OptLevel) - : SelectionDAGISel(tm, OptLevel), - Subtarget(tm.getSubtarget<NVPTXSubtarget>()) { + : SelectionDAGISel(tm, OptLevel), TM(tm) { doMulWide = (OptLevel > 0); } +bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { + Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget()); + return SelectionDAGISel::runOnMachineFunction(MF); +} + int NVPTXDAGToDAGISel::getDivF32Level() const { if (UsePrecDivF32.getNumOccurrences() > 0) { // If nvptx-prec-div32=N is used on the command-line, always honor it @@ -89,16 +93,14 @@ bool NVPTXDAGToDAGISel::useF32FTZ() const { const Function *F = MF->getFunction(); // Otherwise, check for an nvptx-f32ftz attribute on the function if (F->hasFnAttribute("nvptx-f32ftz")) - return (F->getAttributes().getAttribute(AttributeSet::FunctionIndex, - "nvptx-f32ftz") - .getValueAsString() == "true"); + return F->getFnAttribute("nvptx-f32ftz").getValueAsString() == "true"; else return false; } } bool NVPTXDAGToDAGISel::allowFMA() const { - const NVPTXTargetLowering *TL = Subtarget.getTargetLowering(); + const NVPTXTargetLowering *TL = Subtarget->getTargetLowering(); return TL->allowFMA(*MF, OptLevel); } @@ -525,8 +527,7 @@ SDNode *NVPTXDAGToDAGISel::SelectIntrinsicChain(SDNode *N) { } } -static unsigned int getCodeAddrSpace(MemSDNode *N, - const NVPTXSubtarget &Subtarget) { +static unsigned int getCodeAddrSpace(MemSDNode *N) { const Value *Src = N->getMemOperand()->getValue(); if (!Src) @@ -579,20 +580,16 @@ SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { switch (SrcAddrSpace) { default: report_fatal_error("Bad address space in addrspacecast"); case ADDRESS_SPACE_GLOBAL: - Opc = Subtarget.is64Bit() ? NVPTX::cvta_global_yes_64 - : NVPTX::cvta_global_yes; + Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes; break; case ADDRESS_SPACE_SHARED: - Opc = Subtarget.is64Bit() ? NVPTX::cvta_shared_yes_64 - : NVPTX::cvta_shared_yes; + Opc = TM.is64Bit() ? NVPTX::cvta_shared_yes_64 : NVPTX::cvta_shared_yes; break; case ADDRESS_SPACE_CONST: - Opc = Subtarget.is64Bit() ? NVPTX::cvta_const_yes_64 - : NVPTX::cvta_const_yes; + Opc = TM.is64Bit() ? NVPTX::cvta_const_yes_64 : NVPTX::cvta_const_yes; break; case ADDRESS_SPACE_LOCAL: - Opc = Subtarget.is64Bit() ? NVPTX::cvta_local_yes_64 - : NVPTX::cvta_local_yes; + Opc = TM.is64Bit() ? NVPTX::cvta_local_yes_64 : NVPTX::cvta_local_yes; break; } return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src); @@ -604,20 +601,20 @@ SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { switch (DstAddrSpace) { default: report_fatal_error("Bad address space in addrspacecast"); case ADDRESS_SPACE_GLOBAL: - Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_global_yes_64 - : NVPTX::cvta_to_global_yes; + Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64 + : NVPTX::cvta_to_global_yes; break; case ADDRESS_SPACE_SHARED: - Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_shared_yes_64 - : NVPTX::cvta_to_shared_yes; + Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_yes_64 + : NVPTX::cvta_to_shared_yes; break; case ADDRESS_SPACE_CONST: - Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_const_yes_64 - : NVPTX::cvta_to_const_yes; + Opc = + TM.is64Bit() ? NVPTX::cvta_to_const_yes_64 : NVPTX::cvta_to_const_yes; break; case ADDRESS_SPACE_LOCAL: - Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_local_yes_64 - : NVPTX::cvta_to_local_yes; + Opc = + TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes; break; } return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src); @@ -638,7 +635,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { return nullptr; // Address Space Setting - unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget); + unsigned int codeAddrSpace = getCodeAddrSpace(LD); // Volatile Setting // - .volatile is only availalble for .global and .shared @@ -713,9 +710,8 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { getI32Imm(vecType), getI32Imm(fromType), getI32Imm(fromTypeWidth), Addr, Chain }; NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops); - } else if (Subtarget.is64Bit() - ? SelectADDRsi64(N1.getNode(), N1, Base, Offset) - : SelectADDRsi(N1.getNode(), N1, Base, Offset)) { + } else if (TM.is64Bit() ? SelectADDRsi64(N1.getNode(), N1, Base, Offset) + : SelectADDRsi(N1.getNode(), N1, Base, Offset)) { switch (TargetVT) { case MVT::i8: Opcode = NVPTX::LD_i8_asi; @@ -742,10 +738,9 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { getI32Imm(vecType), getI32Imm(fromType), getI32Imm(fromTypeWidth), Base, Offset, Chain }; NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops); - } else if (Subtarget.is64Bit() - ? SelectADDRri64(N1.getNode(), N1, Base, Offset) - : SelectADDRri(N1.getNode(), N1, Base, Offset)) { - if (Subtarget.is64Bit()) { + } else if (TM.is64Bit() ? SelectADDRri64(N1.getNode(), N1, Base, Offset) + : SelectADDRri(N1.getNode(), N1, Base, Offset)) { + if (TM.is64Bit()) { switch (TargetVT) { case MVT::i8: Opcode = NVPTX::LD_i8_ari_64; @@ -797,7 +792,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { getI32Imm(fromTypeWidth), Base, Offset, Chain }; NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops); } else { - if (Subtarget.is64Bit()) { + if (TM.is64Bit()) { switch (TargetVT) { case MVT::i8: Opcode = NVPTX::LD_i8_areg_64; @@ -874,7 +869,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { return nullptr; // Address Space Setting - unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget); + unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD); // Volatile Setting // - .volatile is only availalble for .global and .shared @@ -974,9 +969,8 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { getI32Imm(VecType), getI32Imm(FromType), getI32Imm(FromTypeWidth), Addr, Chain }; LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); - } else if (Subtarget.is64Bit() - ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset) - : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) { + } else if (TM.is64Bit() ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset) + : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) { switch (N->getOpcode()) { default: return nullptr; @@ -1028,10 +1022,9 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { getI32Imm(VecType), getI32Imm(FromType), getI32Imm(FromTypeWidth), Base, Offset, Chain }; LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); - } else if (Subtarget.is64Bit() - ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset) - : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) { - if (Subtarget.is64Bit()) { + } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset) + : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) { + if (TM.is64Bit()) { switch (N->getOpcode()) { default: return nullptr; @@ -1133,7 +1126,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); } else { - if (Subtarget.is64Bit()) { + if (TM.is64Bit()) { switch (N->getOpcode()) { default: return nullptr; @@ -1425,10 +1418,9 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) { SDValue Ops[] = { Addr, Chain }; LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); - } else if (Subtarget.is64Bit() - ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset) - : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) { - if (Subtarget.is64Bit()) { + } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset) + : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) { + if (TM.is64Bit()) { switch (N->getOpcode()) { default: return nullptr; @@ -1710,7 +1702,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) { LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); } else { - if (Subtarget.is64Bit()) { + if (TM.is64Bit()) { switch (N->getOpcode()) { default: return nullptr; @@ -2013,7 +2005,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) { return nullptr; // Address Space Setting - unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget); + unsigned int codeAddrSpace = getCodeAddrSpace(ST); // Volatile Setting // - .volatile is only availalble for .global and .shared @@ -2083,9 +2075,8 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) { getI32Imm(vecType), getI32Imm(toType), getI32Imm(toTypeWidth), Addr, Chain }; NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops); - } else if (Subtarget.is64Bit() - ? SelectADDRsi64(N2.getNode(), N2, Base, Offset) - : SelectADDRsi(N2.getNode(), N2, Base, Offset)) { + } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset) + : SelectADDRsi(N2.getNode(), N2, Base, Offset)) { switch (SourceVT) { case MVT::i8: Opcode = NVPTX::ST_i8_asi; @@ -2112,10 +2103,9 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) { getI32Imm(vecType), getI32Imm(toType), getI32Imm(toTypeWidth), Base, Offset, Chain }; NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops); - } else if (Subtarget.is64Bit() - ? SelectADDRri64(N2.getNode(), N2, Base, Offset) - : SelectADDRri(N2.getNode(), N2, Base, Offset)) { - if (Subtarget.is64Bit()) { + } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset) + : SelectADDRri(N2.getNode(), N2, Base, Offset)) { + if (TM.is64Bit()) { switch (SourceVT) { case MVT::i8: Opcode = NVPTX::ST_i8_ari_64; @@ -2167,7 +2157,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) { getI32Imm(toTypeWidth), Base, Offset, Chain }; NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops); } else { - if (Subtarget.is64Bit()) { + if (TM.is64Bit()) { switch (SourceVT) { case MVT::i8: Opcode = NVPTX::ST_i8_areg_64; @@ -2241,7 +2231,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { EVT StoreVT = MemSD->getMemoryVT(); // Address Space Setting - unsigned CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget); + unsigned CodeAddrSpace = getCodeAddrSpace(MemSD); if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) { report_fatal_error("Cannot store to pointer that points to constant " @@ -2344,9 +2334,8 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { break; } StOps.push_back(Addr); - } else if (Subtarget.is64Bit() - ? SelectADDRsi64(N2.getNode(), N2, Base, Offset) - : SelectADDRsi(N2.getNode(), N2, Base, Offset)) { + } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset) + : SelectADDRsi(N2.getNode(), N2, Base, Offset)) { switch (N->getOpcode()) { default: return nullptr; @@ -2395,10 +2384,9 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { } StOps.push_back(Base); StOps.push_back(Offset); - } else if (Subtarget.is64Bit() - ? SelectADDRri64(N2.getNode(), N2, Base, Offset) - : SelectADDRri(N2.getNode(), N2, Base, Offset)) { - if (Subtarget.is64Bit()) { + } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset) + : SelectADDRri(N2.getNode(), N2, Base, Offset)) { + if (TM.is64Bit()) { switch (N->getOpcode()) { default: return nullptr; @@ -2496,7 +2484,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { StOps.push_back(Base); StOps.push_back(Offset); } else { - if (Subtarget.is64Bit()) { + if (TM.is64Bit()) { switch (N->getOpcode()) { default: return nullptr; @@ -4772,7 +4760,7 @@ SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) { } // How many bits are in our mask? - uint64_t NumBits = CountTrailingOnes_64(MaskVal); + uint64_t NumBits = countTrailingOnes(MaskVal); Len = CurDAG->getTargetConstant(NumBits, MVT::i32); if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) { @@ -4836,10 +4824,10 @@ SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) { NumZeros = 0; // The number of bits in the result bitfield will be the number of // trailing ones (the AND) minus the number of bits we shift off - NumBits = CountTrailingOnes_64(MaskVal) - ShiftAmt; + NumBits = countTrailingOnes(MaskVal) - ShiftAmt; } else if (isShiftedMask_64(MaskVal)) { NumZeros = countTrailingZeros(MaskVal); - unsigned NumOnes = CountTrailingOnes_64(MaskVal >> NumZeros); + unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros); // The number of bits in the result bitfield will be the number of // trailing zeros plus the number of set bits in the mask minus the // number of bits we shift off diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index 69afcd7..ca432b5 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -26,6 +26,7 @@ using namespace llvm; namespace { class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel { + const NVPTXTargetMachine &TM; // If true, generate mul.wide from sext and mul bool doMulWide; @@ -43,8 +44,8 @@ public: const char *getPassName() const override { return "NVPTX DAG->DAG Pattern Instruction Selection"; } - - const NVPTXSubtarget &Subtarget; + bool runOnMachineFunction(MachineFunction &MF) override; + const NVPTXSubtarget *Subtarget; bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index 0b0b536..1dc81f7 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -106,9 +106,9 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, Type *Ty, } // NVPTXTargetLowering Constructor. -NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM) - : TargetLowering(TM), nvTM(&TM), - nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) { +NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, + const NVPTXSubtarget &STI) + : TargetLowering(TM), nvTM(&TM), STI(STI) { // always lower memset, memcpy, and memmove intrinsics to load/store // instructions, rather @@ -167,14 +167,14 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM) setOperationAction(ISD::SRA_PARTS, MVT::i64 , Custom); setOperationAction(ISD::SRL_PARTS, MVT::i64 , Custom); - if (nvptxSubtarget.hasROT64()) { + if (STI.hasROT64()) { setOperationAction(ISD::ROTL, MVT::i64, Legal); setOperationAction(ISD::ROTR, MVT::i64, Legal); } else { setOperationAction(ISD::ROTL, MVT::i64, Expand); setOperationAction(ISD::ROTR, MVT::i64, Expand); } - if (nvptxSubtarget.hasROT32()) { + if (STI.hasROT32()) { setOperationAction(ISD::ROTL, MVT::i32, Legal); setOperationAction(ISD::ROTR, MVT::i32, Legal); } else { @@ -203,8 +203,9 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM) setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); // Turn FP extload into load/fextend - setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); // Turn FP truncstore into trunc + store. setTruncStoreAction(MVT::f32, MVT::f16, Expand); setTruncStoreAction(MVT::f64, MVT::f16, Expand); @@ -214,12 +215,11 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM) setOperationAction(ISD::LOAD, MVT::i1, Custom); setOperationAction(ISD::STORE, MVT::i1, Custom); - setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); - setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); - setTruncStoreAction(MVT::i64, MVT::i1, Expand); - setTruncStoreAction(MVT::i32, MVT::i1, Expand); - setTruncStoreAction(MVT::i16, MVT::i1, Expand); - setTruncStoreAction(MVT::i8, MVT::i1, Expand); + for (MVT VT : MVT::integer_valuetypes()) { + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); + setTruncStoreAction(VT, MVT::i1, Expand); + } // This is legal in NVPTX setOperationAction(ISD::ConstantFP, MVT::f64, Legal); @@ -232,9 +232,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM) setOperationAction(ISD::ADDE, MVT::i64, Expand); // Register custom handling for vector loads/stores - for (int i = MVT::FIRST_VECTOR_VALUETYPE; i <= MVT::LAST_VECTOR_VALUETYPE; - ++i) { - MVT VT = (MVT::SimpleValueType) i; + for (MVT VT : MVT::vector_valuetypes()) { if (IsPTXVectorType(VT)) { setOperationAction(ISD::LOAD, VT, Custom); setOperationAction(ISD::STORE, VT, Custom); @@ -261,6 +259,9 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM) setOperationAction(ISD::CTPOP, MVT::i32, Legal); setOperationAction(ISD::CTPOP, MVT::i64, Legal); + // PTX does not directly support SELP of i1, so promote to i32 first + setOperationAction(ISD::SELECT, MVT::i1, Custom); + // We have some custom DAG combine patterns for these nodes setTargetDAGCombine(ISD::ADD); setTargetDAGCombine(ISD::AND); @@ -270,7 +271,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM) // Now deduce the information based on the above mentioned // actions - computeRegisterProperties(); + computeRegisterProperties(STI.getRegisterInfo()); } const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { @@ -878,7 +879,7 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args, unsigned retAlignment, const ImmutableCallSite *CS) const { - bool isABI = (nvptxSubtarget.getSmVersion() >= 20); + bool isABI = (STI.getSmVersion() >= 20); assert(isABI && "Non-ABI compilation is not supported"); if (!isABI) return ""; @@ -905,16 +906,14 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args, O << ".param .b" << size << " _"; } else if (isa<PointerType>(retTy)) { O << ".param .b" << getPointerTy().getSizeInBits() << " _"; + } else if ((retTy->getTypeID() == Type::StructTyID) || + isa<VectorType>(retTy)) { + O << ".param .align " + << retAlignment + << " .b8 _[" + << getDataLayout()->getTypeAllocSize(retTy) << "]"; } else { - if((retTy->getTypeID() == Type::StructTyID) || - isa<VectorType>(retTy)) { - O << ".param .align " - << retAlignment - << " .b8 _[" - << getDataLayout()->getTypeAllocSize(retTy) << "]"; - } else { - assert(false && "Unknown return type"); - } + llvm_unreachable("Unknown return type"); } O << ") "; } @@ -1045,7 +1044,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Type *retTy = CLI.RetTy; ImmutableCallSite *CS = CLI.CS; - bool isABI = (nvptxSubtarget.getSmVersion() >= 20); + bool isABI = (STI.getSmVersion() >= 20); assert(isABI && "Non-ABI compilation is not supported"); if (!isABI) return Chain; @@ -1456,8 +1455,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, EVT ObjectVT = getValueType(retTy); unsigned NumElts = ObjectVT.getVectorNumElements(); EVT EltVT = ObjectVT.getVectorElementType(); - assert(nvTM->getSubtargetImpl()->getTargetLowering()->getNumRegisters( - F->getContext(), ObjectVT) == NumElts && + assert(STI.getTargetLowering()->getNumRegisters(F->getContext(), + ObjectVT) == NumElts && "Vector was not scalarized"); unsigned sz = EltVT.getSizeInBits(); bool needTruncate = sz < 8 ? true : false; @@ -1475,11 +1474,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, LoadRetVTs.push_back(EltVT); LoadRetVTs.push_back(MVT::Other); LoadRetVTs.push_back(MVT::Glue); - SmallVector<SDValue, 4> LoadRetOps; - LoadRetOps.push_back(Chain); - LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); - LoadRetOps.push_back(DAG.getConstant(0, MVT::i32)); - LoadRetOps.push_back(InFlag); + SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, MVT::i32), + DAG.getConstant(0, MVT::i32), InFlag}; SDValue retval = DAG.getMemIntrinsicNode( NVPTXISD::LoadParam, dl, DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo()); @@ -1505,11 +1501,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } LoadRetVTs.push_back(MVT::Other); LoadRetVTs.push_back(MVT::Glue); - SmallVector<SDValue, 4> LoadRetOps; - LoadRetOps.push_back(Chain); - LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); - LoadRetOps.push_back(DAG.getConstant(0, MVT::i32)); - LoadRetOps.push_back(InFlag); + SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, MVT::i32), + DAG.getConstant(0, MVT::i32), InFlag}; SDValue retval = DAG.getMemIntrinsicNode( NVPTXISD::LoadParamV2, dl, DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo()); @@ -1551,11 +1544,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } LoadRetVTs.push_back(MVT::Other); LoadRetVTs.push_back(MVT::Glue); - SmallVector<SDValue, 4> LoadRetOps; - LoadRetOps.push_back(Chain); - LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); - LoadRetOps.push_back(DAG.getConstant(Ofst, MVT::i32)); - LoadRetOps.push_back(InFlag); + SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, MVT::i32), + DAG.getConstant(Ofst, MVT::i32), InFlag}; SDValue retval = DAG.getMemIntrinsicNode( Opc, dl, DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo()); @@ -1609,11 +1599,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, LoadRetVTs.push_back(MVT::Other); LoadRetVTs.push_back(MVT::Glue); - SmallVector<SDValue, 4> LoadRetOps; - LoadRetOps.push_back(Chain); - LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); - LoadRetOps.push_back(DAG.getConstant(Offsets[i], MVT::i32)); - LoadRetOps.push_back(InFlag); + SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, MVT::i32), + DAG.getConstant(Offsets[i], MVT::i32), InFlag}; SDValue retval = DAG.getMemIntrinsicNode( NVPTXISD::LoadParam, dl, DAG.getVTList(LoadRetVTs), LoadRetOps, @@ -1679,7 +1666,7 @@ SDValue NVPTXTargetLowering::LowerShiftRightParts(SDValue Op, SDValue ShAmt = Op.getOperand(2); unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; - if (VTBits == 32 && nvptxSubtarget.getSmVersion() >= 35) { + if (VTBits == 32 && STI.getSmVersion() >= 35) { // For 32bit and sm35, we can use the funnel shift 'shf' instruction. // {dHi, dLo} = {aHi, aLo} >> Amt @@ -1739,7 +1726,7 @@ SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op, SDValue ShOpHi = Op.getOperand(1); SDValue ShAmt = Op.getOperand(2); - if (VTBits == 32 && nvptxSubtarget.getSmVersion() >= 35) { + if (VTBits == 32 && STI.getSmVersion() >= 35) { // For 32bit and sm35, we can use the funnel shift 'shf' instruction. // {dHi, dLo} = {aHi, aLo} << Amt @@ -1807,11 +1794,29 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SRA_PARTS: case ISD::SRL_PARTS: return LowerShiftRightParts(Op, DAG); + case ISD::SELECT: + return LowerSelect(Op, DAG); default: llvm_unreachable("Custom lowering not defined for operation"); } } +SDValue NVPTXTargetLowering::LowerSelect(SDValue Op, SelectionDAG &DAG) const { + SDValue Op0 = Op->getOperand(0); + SDValue Op1 = Op->getOperand(1); + SDValue Op2 = Op->getOperand(2); + SDLoc DL(Op.getNode()); + + assert(Op.getValueType() == MVT::i1 && "Custom lowering enabled only for i1"); + + Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1); + Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2); + SDValue Select = DAG.getNode(ISD::SELECT, DL, MVT::i32, Op0, Op1, Op2); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Select); + + return Trunc; +} + SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { if (Op.getValueType() == MVT::i1) return LowerLOADi1(Op, DAG); @@ -2033,13 +2038,13 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( const Function *F = MF.getFunction(); const AttributeSet &PAL = F->getAttributes(); - const TargetLowering *TLI = DAG.getSubtarget().getTargetLowering(); + const TargetLowering *TLI = STI.getTargetLowering(); SDValue Root = DAG.getRoot(); std::vector<SDValue> OutChains; bool isKernel = llvm::isKernelFunction(*F); - bool isABI = (nvptxSubtarget.getSmVersion() >= 20); + bool isABI = (STI.getSmVersion() >= 20); assert(isABI && "Non-ABI compilation is not supported"); if (!isABI) return Chain; @@ -2337,7 +2342,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, Type *RetTy = F->getReturnType(); const DataLayout *TD = getDataLayout(); - bool isABI = (nvptxSubtarget.getSmVersion() >= 20); + bool isABI = (STI.getSmVersion() >= 20); assert(isABI && "Non-ABI compilation is not supported"); if (!isABI) return Chain; @@ -3757,7 +3762,8 @@ NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const { } std::pair<unsigned, const TargetRegisterClass *> -NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, +NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + const std::string &Constraint, MVT VT) const { if (Constraint.size() == 1) { switch (Constraint[0]) { @@ -3778,7 +3784,7 @@ NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, return std::make_pair(0U, &NVPTX::Float64RegsRegClass); } } - return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); + return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); } /// getFunctionAlignment - Return the Log2 alignment of this function. @@ -4200,7 +4206,7 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N, default: break; case ISD::ADD: case ISD::FADD: - return PerformADDCombine(N, DCI, nvptxSubtarget, OptLevel); + return PerformADDCombine(N, DCI, STI, OptLevel); case ISD::MUL: return PerformMULCombine(N, DCI, OptLevel); case ISD::SHL: @@ -4285,11 +4291,8 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, } } - SmallVector<SDValue, 8> OtherOps; - // Copy regular operands - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - OtherOps.push_back(N->getOperand(i)); + SmallVector<SDValue, 8> OtherOps(N->op_begin(), N->op_end()); // The select routine does not have access to the LoadSDNode instance, so // pass along the extension information @@ -4402,8 +4405,7 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG, OtherOps.push_back(Chain); // Chain // Skip operand 1 (intrinsic ID) // Others - for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) - OtherOps.push_back(N->getOperand(i)); + OtherOps.append(N->op_begin() + 2, N->op_end()); MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N); @@ -4434,9 +4436,7 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG, "Custom handling of non-i8 ldu/ldg?"); // Just copy all operands as-is - SmallVector<SDValue, 4> Ops; - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - Ops.push_back(N->getOperand(i)); + SmallVector<SDValue, 4> Ops(N->op_begin(), N->op_end()); // Force output to i16 SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other); diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h index d66d81a..1b4da2c 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/lib/Target/NVPTX/NVPTXISelLowering.h @@ -436,7 +436,8 @@ class NVPTXSubtarget; //===--------------------------------------------------------------------===// class NVPTXTargetLowering : public TargetLowering { public: - explicit NVPTXTargetLowering(const NVPTXTargetMachine &TM); + explicit NVPTXTargetLowering(const NVPTXTargetMachine &TM, + const NVPTXSubtarget &STI); SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; @@ -469,7 +470,8 @@ public: ConstraintType getConstraintType(const std::string &Constraint) const override; std::pair<unsigned, const TargetRegisterClass *> - getRegForInlineAsmConstraint(const std::string &Constraint, + getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + const std::string &Constraint, MVT VT) const override; SDValue LowerFormalArguments( @@ -507,8 +509,10 @@ public: bool isFMAFasterThanFMulAndFAdd(EVT) const override { return true; } + bool enableAggressiveFMAFusion(EVT VT) const override { return true; } + private: - const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here + const NVPTXSubtarget &STI; // cache the subtarget here SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx, EVT = MVT::i32) const; @@ -527,6 +531,8 @@ private: SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSelect(SDValue Op, SelectionDAG &DAG) const; + void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const override; SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; diff --git a/lib/Target/NVPTX/NVPTXImageOptimizer.cpp b/lib/Target/NVPTX/NVPTXImageOptimizer.cpp index a98fb37..aa36b6b 100644 --- a/lib/Target/NVPTX/NVPTXImageOptimizer.cpp +++ b/lib/Target/NVPTX/NVPTXImageOptimizer.cpp @@ -16,11 +16,11 @@ #include "NVPTX.h" #include "NVPTXUtilities.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" -#include "llvm/Analysis/ConstantFolding.h" using namespace llvm; diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp index b5b4fbe..dabc3be 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp +++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp @@ -14,11 +14,11 @@ #include "NVPTX.h" #include "NVPTXInstrInfo.h" #include "NVPTXTargetMachine.h" -#include "llvm/IR/Function.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Function.h" using namespace llvm; @@ -28,9 +28,7 @@ using namespace llvm; // Pin the vtable to this file. void NVPTXInstrInfo::anchor() {} -// FIXME: Add the subtarget support on this constructor. -NVPTXInstrInfo::NVPTXInstrInfo(NVPTXSubtarget &STI) - : NVPTXGenInstrInfo(), RegInfo(STI) {} +NVPTXInstrInfo::NVPTXInstrInfo() : NVPTXGenInstrInfo(), RegInfo() {} void NVPTXInstrInfo::copyPhysReg( MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.h b/lib/Target/NVPTX/NVPTXInstrInfo.h index 6de7536..9b5d491 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.h +++ b/lib/Target/NVPTX/NVPTXInstrInfo.h @@ -27,7 +27,7 @@ class NVPTXInstrInfo : public NVPTXGenInstrInfo { const NVPTXRegisterInfo RegInfo; virtual void anchor(); public: - explicit NVPTXInstrInfo(NVPTXSubtarget &STI); + explicit NVPTXInstrInfo(); const NVPTXRegisterInfo &getRegisterInfo() const { return RegInfo; } diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td index 9900b8c..68f0d9f 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -117,24 +117,24 @@ def F32ConstOne : Operand<f32>, PatLeaf<(f32 fpimm)>, SDNodeXForm<fpimm, [{ //===----------------------------------------------------------------------===// -def hasAtomRedG32 : Predicate<"Subtarget.hasAtomRedG32()">; -def hasAtomRedS32 : Predicate<"Subtarget.hasAtomRedS32()">; -def hasAtomRedGen32 : Predicate<"Subtarget.hasAtomRedGen32()">; +def hasAtomRedG32 : Predicate<"Subtarget->hasAtomRedG32()">; +def hasAtomRedS32 : Predicate<"Subtarget->hasAtomRedS32()">; +def hasAtomRedGen32 : Predicate<"Subtarget->hasAtomRedGen32()">; def useAtomRedG32forGen32 : - Predicate<"!Subtarget.hasAtomRedGen32() && Subtarget.hasAtomRedG32()">; -def hasBrkPt : Predicate<"Subtarget.hasBrkPt()">; -def hasAtomRedG64 : Predicate<"Subtarget.hasAtomRedG64()">; -def hasAtomRedS64 : Predicate<"Subtarget.hasAtomRedS64()">; -def hasAtomRedGen64 : Predicate<"Subtarget.hasAtomRedGen64()">; + Predicate<"!Subtarget->hasAtomRedGen32() && Subtarget->hasAtomRedG32()">; +def hasBrkPt : Predicate<"Subtarget->hasBrkPt()">; +def hasAtomRedG64 : Predicate<"Subtarget->hasAtomRedG64()">; +def hasAtomRedS64 : Predicate<"Subtarget->hasAtomRedS64()">; +def hasAtomRedGen64 : Predicate<"Subtarget->hasAtomRedGen64()">; def useAtomRedG64forGen64 : - Predicate<"!Subtarget.hasAtomRedGen64() && Subtarget.hasAtomRedG64()">; -def hasAtomAddF32 : Predicate<"Subtarget.hasAtomAddF32()">; -def hasVote : Predicate<"Subtarget.hasVote()">; -def hasDouble : Predicate<"Subtarget.hasDouble()">; -def reqPTX20 : Predicate<"Subtarget.reqPTX20()">; -def hasLDG : Predicate<"Subtarget.hasLDG()">; -def hasLDU : Predicate<"Subtarget.hasLDU()">; -def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">; + Predicate<"!Subtarget->hasAtomRedGen64() && Subtarget->hasAtomRedG64()">; +def hasAtomAddF32 : Predicate<"Subtarget->hasAtomAddF32()">; +def hasVote : Predicate<"Subtarget->hasVote()">; +def hasDouble : Predicate<"Subtarget->hasDouble()">; +def reqPTX20 : Predicate<"Subtarget->reqPTX20()">; +def hasLDG : Predicate<"Subtarget->hasLDG()">; +def hasLDU : Predicate<"Subtarget->hasLDU()">; +def hasGenericLdSt : Predicate<"Subtarget->hasGenericLdSt()">; def doF32FTZ : Predicate<"useF32FTZ()">; def doNoF32FTZ : Predicate<"!useF32FTZ()">; @@ -150,12 +150,12 @@ def do_DIVF32_FULL : Predicate<"getDivF32Level()==1">; def do_SQRTF32_APPROX : Predicate<"!usePrecSqrtF32()">; def do_SQRTF32_RN : Predicate<"usePrecSqrtF32()">; -def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">; -def noHWROT32 : Predicate<"!Subtarget.hasHWROT32()">; +def hasHWROT32 : Predicate<"Subtarget->hasHWROT32()">; +def noHWROT32 : Predicate<"!Subtarget->hasHWROT32()">; def true : Predicate<"1">; -def hasPTX31 : Predicate<"Subtarget.getPTXVersion() >= 31">; +def hasPTX31 : Predicate<"Subtarget->getPTXVersion() >= 31">; //===----------------------------------------------------------------------===// @@ -296,7 +296,7 @@ multiclass F2<string OpcStr, SDNode OpNode> { // General Type Conversion //----------------------------------- -let neverHasSideEffects = 1 in { +let hasSideEffects = 0 in { // Generate a cvt to the given type from all possible types. // Each instance takes a CvtMode immediate that defines the conversion mode to // use. It can be CvtNONE to omit a conversion mode. @@ -1356,11 +1356,6 @@ defm SELP_u64 : SELP<"u64", Int64Regs, i64imm>; defm SELP_f32 : SELP_PATTERN<"f32", Float32Regs, f32imm, fpimm>; defm SELP_f64 : SELP_PATTERN<"f64", Float64Regs, f64imm, fpimm>; -// Special select for predicate operands -def : Pat<(i1 (select Int1Regs:$p, Int1Regs:$a, Int1Regs:$b)), - (ORb1rr (ANDb1rr Int1Regs:$p, Int1Regs:$a), - (ANDb1rr (NOT1 Int1Regs:$p), Int1Regs:$b))>; - // // Funnnel shift in clamp mode // @@ -1659,12 +1654,12 @@ multiclass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> { (SET_f64ir fpimm:$a, Float64Regs:$b, Mode)>; } -defm FSetGT : FSET_FORMAT<setogt, CmpGT, CmpGT_FTZ>; -defm FSetLT : FSET_FORMAT<setolt, CmpLT, CmpLT_FTZ>; -defm FSetGE : FSET_FORMAT<setoge, CmpGE, CmpGE_FTZ>; -defm FSetLE : FSET_FORMAT<setole, CmpLE, CmpLE_FTZ>; -defm FSetEQ : FSET_FORMAT<setoeq, CmpEQ, CmpEQ_FTZ>; -defm FSetNE : FSET_FORMAT<setone, CmpNE, CmpNE_FTZ>; +defm FSetOGT : FSET_FORMAT<setogt, CmpGT, CmpGT_FTZ>; +defm FSetOLT : FSET_FORMAT<setolt, CmpLT, CmpLT_FTZ>; +defm FSetOGE : FSET_FORMAT<setoge, CmpGE, CmpGE_FTZ>; +defm FSetOLE : FSET_FORMAT<setole, CmpLE, CmpLE_FTZ>; +defm FSetOEQ : FSET_FORMAT<setoeq, CmpEQ, CmpEQ_FTZ>; +defm FSetONE : FSET_FORMAT<setone, CmpNE, CmpNE_FTZ>; defm FSetUGT : FSET_FORMAT<setugt, CmpGTU, CmpGTU_FTZ>; defm FSetULT : FSET_FORMAT<setult, CmpLTU, CmpLTU_FTZ>; @@ -1673,6 +1668,13 @@ defm FSetULE : FSET_FORMAT<setule, CmpLEU, CmpLEU_FTZ>; defm FSetUEQ : FSET_FORMAT<setueq, CmpEQU, CmpEQU_FTZ>; defm FSetUNE : FSET_FORMAT<setune, CmpNEU, CmpNEU_FTZ>; +defm FSetGT : FSET_FORMAT<setgt, CmpGT, CmpGT_FTZ>; +defm FSetLT : FSET_FORMAT<setlt, CmpLT, CmpLT_FTZ>; +defm FSetGE : FSET_FORMAT<setge, CmpGE, CmpGE_FTZ>; +defm FSetLE : FSET_FORMAT<setle, CmpLE, CmpLE_FTZ>; +defm FSetEQ : FSET_FORMAT<seteq, CmpEQ, CmpEQ_FTZ>; +defm FSetNE : FSET_FORMAT<setne, CmpNE, CmpNE_FTZ>; + defm FSetNUM : FSET_FORMAT<seto, CmpNUM, CmpNUM_FTZ>; defm FSetNAN : FSET_FORMAT<setuo, CmpNAN, CmpNAN_FTZ>; @@ -2094,7 +2096,7 @@ multiclass LD<NVPTXRegClass regclass> { "$fromWidth \t$dst, [$addr+$offset];"), []>; } -let mayLoad=1, neverHasSideEffects=1 in { +let mayLoad=1, hasSideEffects=0 in { defm LD_i8 : LD<Int16Regs>; defm LD_i16 : LD<Int16Regs>; defm LD_i32 : LD<Int32Regs>; @@ -2136,7 +2138,7 @@ multiclass ST<NVPTXRegClass regclass> { " \t[$addr+$offset], $src;"), []>; } -let mayStore=1, neverHasSideEffects=1 in { +let mayStore=1, hasSideEffects=0 in { defm ST_i8 : ST<Int16Regs>; defm ST_i16 : ST<Int16Regs>; defm ST_i32 : ST<Int32Regs>; @@ -2220,7 +2222,7 @@ multiclass LD_VEC<NVPTXRegClass regclass> { "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"), []>; } -let mayLoad=1, neverHasSideEffects=1 in { +let mayLoad=1, hasSideEffects=0 in { defm LDV_i8 : LD_VEC<Int16Regs>; defm LDV_i16 : LD_VEC<Int16Regs>; defm LDV_i32 : LD_VEC<Int32Regs>; @@ -2303,7 +2305,7 @@ multiclass ST_VEC<NVPTXRegClass regclass> { "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"), []>; } -let mayStore=1, neverHasSideEffects=1 in { +let mayStore=1, hasSideEffects=0 in { defm STV_i8 : ST_VEC<Int16Regs>; defm STV_i16 : ST_VEC<Int16Regs>; defm STV_i32 : ST_VEC<Int32Regs>; diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h index 8759406..da301d5 100644 --- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h +++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h @@ -16,6 +16,7 @@ #define LLVM_LIB_TARGET_NVPTX_NVPTXLOWERAGGRCOPIES_H #include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/StackProtector.h" #include "llvm/IR/DataLayout.h" #include "llvm/Pass.h" @@ -29,8 +30,8 @@ struct NVPTXLowerAggrCopies : public FunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<DataLayoutPass>(); - AU.addPreserved("stack-protector"); AU.addPreserved<MachineFunctionAnalysis>(); + AU.addPreserved<StackProtector>(); } bool runOnFunction(Function &F) override; diff --git a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp index a1e1b9e..c1c67e3 100644 --- a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp +++ b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp @@ -48,9 +48,9 @@ MachineFunctionPass *llvm::createNVPTXPrologEpilogPass() { char NVPTXPrologEpilogPass::ID = 0; bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) { - const TargetMachine &TM = MF.getTarget(); - const TargetFrameLowering &TFI = *TM.getSubtargetImpl()->getFrameLowering(); - const TargetRegisterInfo &TRI = *TM.getSubtargetImpl()->getRegisterInfo(); + const TargetSubtargetInfo &STI = MF.getSubtarget(); + const TargetFrameLowering &TFI = *STI.getFrameLowering(); + const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); bool Modified = false; calculateFrameObjectOffsets(MF); diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp index 358ccce..5ca96e4 100644 --- a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp @@ -71,8 +71,7 @@ std::string getNVPTXRegClassStr(TargetRegisterClass const *RC) { } } -NVPTXRegisterInfo::NVPTXRegisterInfo(const NVPTXSubtarget &st) - : NVPTXGenRegisterInfo(0), Is64Bit(st.is64Bit()) {} +NVPTXRegisterInfo::NVPTXRegisterInfo() : NVPTXGenRegisterInfo(0) {} #define GET_REGINFO_TARGET_DESC #include "NVPTXGenRegisterInfo.inc" diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.h b/lib/Target/NVPTX/NVPTXRegisterInfo.h index d2e6733..75b8f15 100644 --- a/lib/Target/NVPTX/NVPTXRegisterInfo.h +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.h @@ -22,19 +22,13 @@ #include "NVPTXGenRegisterInfo.inc" namespace llvm { - -// Forward Declarations. -class TargetInstrInfo; -class NVPTXSubtarget; - class NVPTXRegisterInfo : public NVPTXGenRegisterInfo { private: - bool Is64Bit; // Hold Strings that can be free'd all together with NVPTXRegisterInfo ManagedStringPool ManagedStrPool; public: - NVPTXRegisterInfo(const NVPTXSubtarget &st); + NVPTXRegisterInfo(); //------------------------------------------------------ // Pure virtual functions from TargetRegisterInfo diff --git a/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp index 324420d..e83f735 100644 --- a/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp +++ b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp @@ -16,11 +16,12 @@ #include "NVPTX.h" #include "NVPTXMachineFunctionInfo.h" #include "NVPTXSubtarget.h" +#include "NVPTXTargetMachine.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/DenseSet.h" using namespace llvm; @@ -142,8 +143,9 @@ findIndexForHandle(MachineOperand &Op, MachineFunction &MF, unsigned &Idx) { case NVPTX::LD_i64_avar: { // The handle is a parameter value being loaded, replace with the // parameter symbol - const NVPTXSubtarget &ST = MF.getTarget().getSubtarget<NVPTXSubtarget>(); - if (ST.getDrvInterface() == NVPTX::CUDA) { + const NVPTXTargetMachine &TM = + static_cast<const NVPTXTargetMachine &>(MF.getTarget()); + if (TM.getDrvInterface() == NVPTX::CUDA) { // For CUDA, we preserve the param loads coming from function arguments return false; } diff --git a/lib/Target/NVPTX/NVPTXSubtarget.cpp b/lib/Target/NVPTX/NVPTXSubtarget.cpp index 3d52532..069d6e1 100644 --- a/lib/Target/NVPTX/NVPTXSubtarget.cpp +++ b/lib/Target/NVPTX/NVPTXSubtarget.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "NVPTXSubtarget.h" +#include "NVPTXTargetMachine.h" using namespace llvm; @@ -25,17 +26,6 @@ using namespace llvm; // Pin the vtable to this file. void NVPTXSubtarget::anchor() {} -static std::string computeDataLayout(bool is64Bit) { - std::string Ret = "e"; - - if (!is64Bit) - Ret += "-p:32:32"; - - Ret += "-i64:64-v16:16-v32:32-n16:32:64"; - - return Ret; -} - NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { // Provide the default CPU if we don't have one. @@ -54,18 +44,18 @@ NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU, } NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, const TargetMachine &TM, - bool is64Bit) - : NVPTXGenSubtargetInfo(TT, CPU, FS), Is64Bit(is64Bit), PTXVersion(0), - SmVersion(20), DL(computeDataLayout(is64Bit)), - InstrInfo(initializeSubtargetDependencies(CPU, FS)), - TLInfo((const NVPTXTargetMachine &)TM), TSInfo(&DL), - FrameLowering(*this) { - - Triple T(TT); - - if (T.getOS() == Triple::NVCL) - drvInterface = NVPTX::NVCL; - else - drvInterface = NVPTX::CUDA; + const std::string &FS, + const NVPTXTargetMachine &TM) + : NVPTXGenSubtargetInfo(TT, CPU, FS), PTXVersion(0), SmVersion(20), TM(TM), + InstrInfo(), TLInfo(TM, initializeSubtargetDependencies(CPU, FS)), + TSInfo(TM.getDataLayout()), FrameLowering() {} + +bool NVPTXSubtarget::hasImageHandles() const { + // Enable handles for Kepler+, where CUDA supports indirect surfaces and + // textures + if (TM.getDrvInterface() == NVPTX::CUDA) + return (SmVersion >= 30); + + // Disabled, otherwise + return false; } diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h index fb2d404..e9833e5 100644 --- a/lib/Target/NVPTX/NVPTXSubtarget.h +++ b/lib/Target/NVPTX/NVPTXSubtarget.h @@ -32,8 +32,6 @@ namespace llvm { class NVPTXSubtarget : public NVPTXGenSubtargetInfo { virtual void anchor(); std::string TargetName; - NVPTX::DrvInterface drvInterface; - bool Is64Bit; // PTX version x.y is represented as 10*x+y, e.g. 3.1 == 31 unsigned PTXVersion; @@ -41,7 +39,7 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo { // SM version x.y is represented as 10*x+y, e.g. 3.1 == 31 unsigned int SmVersion; - const DataLayout DL; // Calculates type size & alignment + const NVPTXTargetMachine &TM; NVPTXInstrInfo InstrInfo; NVPTXTargetLowering TLInfo; TargetSelectionDAGInfo TSInfo; @@ -55,13 +53,12 @@ public: /// of the specified module. /// NVPTXSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, const TargetMachine &TM, bool is64Bit); + const std::string &FS, const NVPTXTargetMachine &TM); const TargetFrameLowering *getFrameLowering() const override { return &FrameLowering; } const NVPTXInstrInfo *getInstrInfo() const override { return &InstrInfo; } - const DataLayout *getDataLayout() const override { return &DL; } const NVPTXRegisterInfo *getRegisterInfo() const override { return &InstrInfo.getRegisterInfo(); } @@ -95,20 +92,9 @@ public: } inline bool hasROT32() const { return hasHWROT32() || hasSWROT32(); } inline bool hasROT64() const { return SmVersion >= 20; } - - bool hasImageHandles() const { - // Enable handles for Kepler+, where CUDA supports indirect surfaces and - // textures - if (getDrvInterface() == NVPTX::CUDA) - return (SmVersion >= 30); - - // Disabled, otherwise - return false; - } - bool is64Bit() const { return Is64Bit; } + bool hasImageHandles() const; unsigned int getSmVersion() const { return SmVersion; } - NVPTX::DrvInterface getDrvInterface() const { return drvInterface; } std::string getTargetName() const { return TargetName; } unsigned getPTXVersion() const { return PTXVersion; } diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index d87693f..1a267a6 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -17,6 +17,7 @@ #include "NVPTXAllocaHoisting.h" #include "NVPTXLowerAggrCopies.h" #include "NVPTXTargetObjectFile.h" +#include "NVPTXTargetTransformInfo.h" #include "llvm/Analysis/Passes.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" @@ -24,12 +25,12 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/IRPrintingPasses.h" +#include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Verifier.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/PassManager.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FormattedStream.h" @@ -69,14 +70,29 @@ extern "C" void LLVMInitializeNVPTXTarget() { initializeNVPTXLowerStructArgsPass(*PassRegistry::getPassRegistry()); } +static std::string computeDataLayout(bool is64Bit) { + std::string Ret = "e"; + + if (!is64Bit) + Ret += "-p:32:32"; + + Ret += "-i64:64-v16:16-v32:32-n16:32:64"; + + return Ret; +} + NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, bool is64bit) - : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), is64bit(is64bit), TLOF(make_unique<NVPTXTargetObjectFile>()), - Subtarget(TT, CPU, FS, *this, is64bit) { + DL(computeDataLayout(is64bit)), Subtarget(TT, CPU, FS, *this) { + if (Triple(TT).getOS() == Triple::NVCL) + drvInterface = NVPTX::NVCL; + else + drvInterface = NVPTX::CUDA; initAsmInfo(); } @@ -110,8 +126,7 @@ public: void addIRPasses() override; bool addInstSelector() override; - bool addPreRegAlloc() override; - bool addPostRegAlloc() override; + void addPostRegAlloc() override; void addMachineSSAOptimization() override; FunctionPass *createTargetRegisterAllocator(bool) override; @@ -125,12 +140,9 @@ TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) { return PassConfig; } -void NVPTXTargetMachine::addAnalysisPasses(PassManagerBase &PM) { - // Add first the target-independent BasicTTI pass, then our NVPTX pass. This - // allows the NVPTX pass to delegate to the target independent layer when - // appropriate. - PM.add(createBasicTargetTransformInfoPass(this)); - PM.add(createNVPTXTargetTransformInfoPass(this)); +TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() { + return TargetIRAnalysis( + [this](Function &) { return TargetTransformInfo(NVPTXTTIImpl(this)); }); } void NVPTXPassConfig::addIRPasses() { @@ -149,6 +161,7 @@ void NVPTXPassConfig::addIRPasses() { addPass(createNVPTXAssignValidGlobalNamesPass()); addPass(createGenericToNVVMPass()); addPass(createNVPTXFavorNonGenericAddrSpacesPass()); + addPass(createStraightLineStrengthReducePass()); addPass(createSeparateConstOffsetFromGEPPass()); // The SeparateConstOffsetFromGEP pass creates variadic bases that can be used // by multiple GEPs. Run GVN or EarlyCSE to really reuse them. GVN generates @@ -183,10 +196,8 @@ bool NVPTXPassConfig::addInstSelector() { return false; } -bool NVPTXPassConfig::addPreRegAlloc() { return false; } -bool NVPTXPassConfig::addPostRegAlloc() { - addPass(createNVPTXPrologEpilogPass()); - return false; +void NVPTXPassConfig::addPostRegAlloc() { + addPass(createNVPTXPrologEpilogPass(), false); } FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) { diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h index a726bd1..a81abfe 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.h +++ b/lib/Target/NVPTX/NVPTXTargetMachine.h @@ -14,8 +14,8 @@ #ifndef LLVM_LIB_TARGET_NVPTX_NVPTXTARGETMACHINE_H #define LLVM_LIB_TARGET_NVPTX_NVPTXTARGETMACHINE_H -#include "NVPTXSubtarget.h" #include "ManagedStringPool.h" +#include "NVPTXSubtarget.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetSelectionDAGInfo.h" @@ -25,7 +25,10 @@ namespace llvm { /// NVPTXTargetMachine /// class NVPTXTargetMachine : public LLVMTargetMachine { + bool is64bit; std::unique_ptr<TargetLoweringObjectFile> TLOF; + const DataLayout DL; // Calculates type size & alignment + NVPTX::DrvInterface drvInterface; NVPTXSubtarget Subtarget; // Hold Strings that can be free'd all together with NVPTXTargetMachine @@ -37,9 +40,10 @@ public: CodeModel::Model CM, CodeGenOpt::Level OP, bool is64bit); ~NVPTXTargetMachine() override; - + const DataLayout *getDataLayout() const override { return &DL; } const NVPTXSubtarget *getSubtargetImpl() const override { return &Subtarget; } - + bool is64Bit() const { return is64bit; } + NVPTX::DrvInterface getDrvInterface() const { return drvInterface; } ManagedStringPool *getManagedStrPool() const { return const_cast<ManagedStringPool *>(&ManagedStrPool); } @@ -55,8 +59,7 @@ public: return TLOF.get(); } - /// \brief Register NVPTX analysis passes with a pass manager. - void addAnalysisPasses(PassManagerBase &PM) override; + TargetIRAnalysis getTargetIRAnalysis() override; }; // NVPTXTargetMachine. diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp index b09d0d4..b8af04d 100644 --- a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -1,4 +1,4 @@ -//===-- NVPTXTargetTransformInfo.cpp - NVPTX specific TTI pass ---------===// +//===-- NVPTXTargetTransformInfo.cpp - NVPTX specific TTI -----------------===// // // The LLVM Compiler Infrastructure // @@ -6,19 +6,12 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// \file -// This file implements a TargetTransformInfo analysis pass specific to the -// NVPTX target machine. It uses the target's detailed information to provide -// more precise answers to certain TTI queries, while letting the target -// independent and default TTI implementations handle the rest. -// -//===----------------------------------------------------------------------===// -#include "NVPTXTargetMachine.h" +#include "NVPTXTargetTransformInfo.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/BasicTTIImpl.h" #include "llvm/Support/Debug.h" #include "llvm/Target/CostTable.h" #include "llvm/Target/TargetLowering.h" @@ -26,69 +19,10 @@ using namespace llvm; #define DEBUG_TYPE "NVPTXtti" -// Declare the pass initialization routine locally as target-specific passes -// don't have a target-wide initialization entry point, and so we rely on the -// pass constructor initialization. -namespace llvm { -void initializeNVPTXTTIPass(PassRegistry &); -} - -namespace { - -class NVPTXTTI final : public ImmutablePass, public TargetTransformInfo { - const NVPTXTargetLowering *TLI; -public: - NVPTXTTI() : ImmutablePass(ID), TLI(nullptr) { - llvm_unreachable("This pass cannot be directly constructed"); - } - - NVPTXTTI(const NVPTXTargetMachine *TM) - : ImmutablePass(ID), TLI(TM->getSubtargetImpl()->getTargetLowering()) { - initializeNVPTXTTIPass(*PassRegistry::getPassRegistry()); - } - - void initializePass() override { pushTTIStack(this); } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - TargetTransformInfo::getAnalysisUsage(AU); - } - - /// Pass identification. - static char ID; - - /// Provide necessary pointer adjustments for the two base classes. - void *getAdjustedAnalysisPointer(const void *ID) override { - if (ID == &TargetTransformInfo::ID) - return (TargetTransformInfo *)this; - return this; - } - - bool hasBranchDivergence() const override; - - unsigned getArithmeticInstrCost( - unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue, - OperandValueKind Opd2Info = OK_AnyValue, - OperandValueProperties Opd1PropInfo = OP_None, - OperandValueProperties Opd2PropInfo = OP_None) const override; -}; - -} // end anonymous namespace - -INITIALIZE_AG_PASS(NVPTXTTI, TargetTransformInfo, "NVPTXtti", - "NVPTX Target Transform Info", true, true, false) -char NVPTXTTI::ID = 0; - -ImmutablePass * -llvm::createNVPTXTargetTransformInfoPass(const NVPTXTargetMachine *TM) { - return new NVPTXTTI(TM); -} - -bool NVPTXTTI::hasBranchDivergence() const { return true; } - -unsigned NVPTXTTI::getArithmeticInstrCost( - unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, - OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, - OperandValueProperties Opd2PropInfo) const { +unsigned NVPTXTTIImpl::getArithmeticInstrCost( + unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info, + TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, + TTI::OperandValueProperties Opd2PropInfo) { // Legalize the type. std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty); @@ -96,8 +30,8 @@ unsigned NVPTXTTI::getArithmeticInstrCost( switch (ISD) { default: - return TargetTransformInfo::getArithmeticInstrCost( - Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo); + return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, + Opd1PropInfo, Opd2PropInfo); case ISD::ADD: case ISD::MUL: case ISD::XOR: @@ -109,7 +43,7 @@ unsigned NVPTXTTI::getArithmeticInstrCost( if (LT.second.SimpleTy == MVT::i64) return 2 * LT.first; // Delegate other cases to the basic TTI. - return TargetTransformInfo::getArithmeticInstrCost( - Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo); + return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, + Opd1PropInfo, Opd2PropInfo); } } diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h new file mode 100644 index 0000000..bf21e88 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h @@ -0,0 +1,74 @@ +//===-- NVPTXTargetTransformInfo.h - NVPTX specific TTI ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file a TargetTransformInfo::Concept conforming object specific to the +/// NVPTX target machine. It uses the target's detailed information to +/// provide more precise answers to certain TTI queries, while letting the +/// target independent and default TTI implementations handle the rest. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H +#define LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H + +#include "NVPTX.h" +#include "NVPTXTargetMachine.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/BasicTTIImpl.h" +#include "llvm/Target/TargetLowering.h" + +namespace llvm { + +class NVPTXTTIImpl : public BasicTTIImplBase<NVPTXTTIImpl> { + typedef BasicTTIImplBase<NVPTXTTIImpl> BaseT; + typedef TargetTransformInfo TTI; + friend BaseT; + + const NVPTXSubtarget *ST; + const NVPTXTargetLowering *TLI; + + const NVPTXSubtarget *getST() const { return ST; }; + const NVPTXTargetLowering *getTLI() const { return TLI; }; + +public: + explicit NVPTXTTIImpl(const NVPTXTargetMachine *TM) + : BaseT(TM), ST(TM->getSubtargetImpl()), TLI(ST->getTargetLowering()) {} + + // Provide value semantics. MSVC requires that we spell all of these out. + NVPTXTTIImpl(const NVPTXTTIImpl &Arg) + : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {} + NVPTXTTIImpl(NVPTXTTIImpl &&Arg) + : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)), + TLI(std::move(Arg.TLI)) {} + NVPTXTTIImpl &operator=(const NVPTXTTIImpl &RHS) { + BaseT::operator=(static_cast<const BaseT &>(RHS)); + ST = RHS.ST; + TLI = RHS.TLI; + return *this; + } + NVPTXTTIImpl &operator=(NVPTXTTIImpl &&RHS) { + BaseT::operator=(std::move(static_cast<BaseT &>(RHS))); + ST = std::move(RHS.ST); + TLI = std::move(RHS.TLI); + return *this; + } + + bool hasBranchDivergence() { return true; } + + unsigned getArithmeticInstrCost( + unsigned Opcode, Type *Ty, + TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, + TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, + TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, + TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None); +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/NVPTX/NVPTXUtilities.cpp b/lib/Target/NVPTX/NVPTXUtilities.cpp index 5caa8bd..cf1feac 100644 --- a/lib/Target/NVPTX/NVPTXUtilities.cpp +++ b/lib/Target/NVPTX/NVPTXUtilities.cpp @@ -15,16 +15,16 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/MutexGuard.h" #include <algorithm> #include <cstring> #include <map> #include <string> #include <vector> -#include "llvm/Support/ManagedStatic.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/Support/MutexGuard.h" using namespace llvm; @@ -52,7 +52,7 @@ static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) { assert(prop && "Annotation property not a string"); // value - ConstantInt *Val = dyn_cast<ConstantInt>(md->getOperand(i + 1)); + ConstantInt *Val = mdconst::dyn_extract<ConstantInt>(md->getOperand(i + 1)); assert(Val && "Value operand not a constant int"); std::string keyname = prop->getString().str(); @@ -75,7 +75,8 @@ static void cacheAnnotationFromMD(const Module *m, const GlobalValue *gv) { for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { const MDNode *elem = NMD->getOperand(i); - Value *entity = elem->getOperand(0); + GlobalValue *entity = + mdconst::dyn_extract_or_null<GlobalValue>(elem->getOperand(0)); // entity may be null due to DCE if (!entity) continue; @@ -322,7 +323,7 @@ bool llvm::getAlign(const CallInst &I, unsigned index, unsigned &align) { if (MDNode *alignNode = I.getMetadata("callalign")) { for (int i = 0, n = alignNode->getNumOperands(); i < n; i++) { if (const ConstantInt *CI = - dyn_cast<ConstantInt>(alignNode->getOperand(i))) { + mdconst::dyn_extract<ConstantInt>(alignNode->getOperand(i))) { unsigned v = CI->getZExtValue(); if ((v >> 16) == index) { align = v & 0xFFFF; diff --git a/lib/Target/NVPTX/NVPTXVector.td b/lib/Target/NVPTX/NVPTXVector.td index 775df19..85aa34e 100644 --- a/lib/Target/NVPTX/NVPTXVector.td +++ b/lib/Target/NVPTX/NVPTXVector.td @@ -661,7 +661,7 @@ class ShuffleAsmStr4<string type> string s = !strconcat(t6, ShuffleOneLine<"4", "3", type>.s); } -let neverHasSideEffects=1, VecInstType=isVecShuffle.Value in { +let hasSideEffects=0, VecInstType=isVecShuffle.Value in { def VecShuffle_v4f32 : NVPTXVecInst<(outs V4F32Regs:$dst), (ins V4F32Regs:$src1, V4F32Regs:$src2, i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3), @@ -847,7 +847,7 @@ class Vec_Move<string asmstr, NVPTXRegClass vclass, NVPTXInst sop=NOP> !strconcat(asmstr, "\t${dst:vecfull}, ${src:vecfull};"), [], sop>; -let isAsCheapAsAMove=1, neverHasSideEffects=1, IsSimpleMove=1, +let isAsCheapAsAMove=1, hasSideEffects=0, IsSimpleMove=1, VecInstType=isVecOther.Value in { def V4f32Mov : Vec_Move<"mov.v4.f32", V4F32Regs, FMOV32rr>; def V2f32Mov : Vec_Move<"mov.v2.f32", V2F32Regs, FMOV32rr>; |