diff options
Diffstat (limited to 'lib/Target/NVPTX')
28 files changed, 179 insertions, 290 deletions
diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt index 3a4a19d..cdd2f1f 100644 --- a/lib/Target/NVPTX/CMakeLists.txt +++ b/lib/Target/NVPTX/CMakeLists.txt @@ -29,7 +29,6 @@ set(NVPTXCodeGen_sources NVPTXTargetMachine.cpp NVPTXTargetTransformInfo.cpp NVPTXUtilities.cpp - NVPTXutil.cpp NVVMReflect.cpp ) diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp index 11d737e..b9df3d1 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp @@ -39,6 +39,8 @@ NVPTXMCAsmInfo::NVPTXMCAsmInfo(StringRef TT) { InlineAsmEnd = " inline asm"; SupportsDebugInformation = CompileForDebugging; + // PTX does not allow .align on functions. + HasFunctionAlignment = false; HasDotTypeDotSizeDirective = false; Data8bitsDirective = " .b8 "; diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp index 158ca90..2b4d864 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp @@ -71,35 +71,23 @@ static MCInstPrinter *createNVPTXMCInstPrinter(const Target &T, // Force static initialization. extern "C" void LLVMInitializeNVPTXTargetMC() { - // Register the MC asm info. - RegisterMCAsmInfo<NVPTXMCAsmInfo> X(TheNVPTXTarget32); - RegisterMCAsmInfo<NVPTXMCAsmInfo> Y(TheNVPTXTarget64); - - // Register the MC codegen info. - TargetRegistry::RegisterMCCodeGenInfo(TheNVPTXTarget32, - createNVPTXMCCodeGenInfo); - TargetRegistry::RegisterMCCodeGenInfo(TheNVPTXTarget64, - createNVPTXMCCodeGenInfo); - - // Register the MC instruction info. - TargetRegistry::RegisterMCInstrInfo(TheNVPTXTarget32, createNVPTXMCInstrInfo); - TargetRegistry::RegisterMCInstrInfo(TheNVPTXTarget64, createNVPTXMCInstrInfo); - - // Register the MC register info. - TargetRegistry::RegisterMCRegInfo(TheNVPTXTarget32, - createNVPTXMCRegisterInfo); - TargetRegistry::RegisterMCRegInfo(TheNVPTXTarget64, - createNVPTXMCRegisterInfo); - - // Register the MC subtarget info. - TargetRegistry::RegisterMCSubtargetInfo(TheNVPTXTarget32, - createNVPTXMCSubtargetInfo); - TargetRegistry::RegisterMCSubtargetInfo(TheNVPTXTarget64, - createNVPTXMCSubtargetInfo); - - // Register the MCInstPrinter. - TargetRegistry::RegisterMCInstPrinter(TheNVPTXTarget32, - createNVPTXMCInstPrinter); - TargetRegistry::RegisterMCInstPrinter(TheNVPTXTarget64, - createNVPTXMCInstPrinter); + for (Target *T : {&TheNVPTXTarget32, &TheNVPTXTarget64}) { + // Register the MC asm info. + RegisterMCAsmInfo<NVPTXMCAsmInfo> X(*T); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(*T, createNVPTXMCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(*T, createNVPTXMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(*T, createNVPTXMCRegisterInfo); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(*T, createNVPTXMCSubtargetInfo); + + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(*T, createNVPTXMCInstPrinter); + } } diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h index 98821d2..bfd5123 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h @@ -14,6 +14,8 @@ #ifndef LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXMCTARGETDESC_H #define LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXMCTARGETDESC_H +#include <stdint.h> + namespace llvm { class Target; diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp index 1f37696..4f3ccf4 100644 --- a/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp +++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp @@ -12,11 +12,33 @@ //===----------------------------------------------------------------------===// #include "NVPTXAllocaHoisting.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/StackProtector.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" +using namespace llvm; -namespace llvm { +namespace { +// Hoisting the alloca instructions in the non-entry blocks to the entry +// block. +class NVPTXAllocaHoisting : public FunctionPass { +public: + static char ID; // Pass ID + NVPTXAllocaHoisting() : FunctionPass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addPreserved<MachineFunctionAnalysis>(); + AU.addPreserved<StackProtector>(); + } + + const char *getPassName() const override { + return "NVPTX specific alloca hoisting"; + } + + bool runOnFunction(Function &function) override; +}; +} // namespace bool NVPTXAllocaHoisting::runOnFunction(Function &function) { bool functionModified = false; @@ -36,11 +58,15 @@ bool NVPTXAllocaHoisting::runOnFunction(Function &function) { return functionModified; } -char NVPTXAllocaHoisting::ID = 1; -static RegisterPass<NVPTXAllocaHoisting> -X("alloca-hoisting", "Hoisting alloca instructions in non-entry " - "blocks to the entry block"); +char NVPTXAllocaHoisting::ID = 0; + +namespace llvm { +void initializeNVPTXAllocaHoistingPass(PassRegistry &); +} -FunctionPass *createAllocaHoisting() { return new NVPTXAllocaHoisting(); } +INITIALIZE_PASS( + NVPTXAllocaHoisting, "alloca-hoisting", + "Hoisting alloca instructions in non-entry blocks to the entry block", + false, false) -} // end namespace llvm +FunctionPass *llvm::createAllocaHoisting() { return new NVPTXAllocaHoisting; } diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.h b/lib/Target/NVPTX/NVPTXAllocaHoisting.h index c343980..7a6fc7d 100644 --- a/lib/Target/NVPTX/NVPTXAllocaHoisting.h +++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.h @@ -14,38 +14,10 @@ #ifndef LLVM_LIB_TARGET_NVPTX_NVPTXALLOCAHOISTING_H #define LLVM_LIB_TARGET_NVPTX_NVPTXALLOCAHOISTING_H -#include "llvm/CodeGen/MachineFunctionAnalysis.h" -#include "llvm/CodeGen/StackProtector.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/Pass.h" - namespace llvm { - class FunctionPass; -class Function; - -// Hoisting the alloca instructions in the non-entry blocks to the entry -// block. -class NVPTXAllocaHoisting : public FunctionPass { -public: - static char ID; // Pass ID - NVPTXAllocaHoisting() : FunctionPass(ID) {} - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<DataLayoutPass>(); - AU.addPreserved<MachineFunctionAnalysis>(); - AU.addPreserved<StackProtector>(); - } - - const char *getPassName() const override { - return "NVPTX specific alloca hoisting"; - } - - bool runOnFunction(Function &function) override; -}; extern FunctionPass *createAllocaHoisting(); - } // end namespace llvm #endif diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 833db04..cc58b07 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -504,8 +504,7 @@ void NVPTXAsmPrinter::EmitFunctionBodyEnd() { void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const { unsigned RegNo = MI->getOperand(0).getReg(); - const TargetRegisterInfo *TRI = nvptxSubtarget->getRegisterInfo(); - if (TRI->isVirtualRegister(RegNo)) { + if (TargetRegisterInfo::isVirtualRegister(RegNo)) { OutStreamer.AddComment(Twine("implicit-def: ") + getVirtualRegisterName(RegNo)); } else { @@ -522,15 +521,15 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F, // If none of reqntid* is specified, don't output reqntid directive. unsigned reqntidx, reqntidy, reqntidz; bool specified = false; - if (llvm::getReqNTIDx(F, reqntidx) == false) + if (!llvm::getReqNTIDx(F, reqntidx)) reqntidx = 1; else specified = true; - if (llvm::getReqNTIDy(F, reqntidy) == false) + if (!llvm::getReqNTIDy(F, reqntidy)) reqntidy = 1; else specified = true; - if (llvm::getReqNTIDz(F, reqntidz) == false) + if (!llvm::getReqNTIDz(F, reqntidz)) reqntidz = 1; else specified = true; @@ -544,15 +543,15 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F, // If none of maxntid* is specified, don't output maxntid directive. unsigned maxntidx, maxntidy, maxntidz; specified = false; - if (llvm::getMaxNTIDx(F, maxntidx) == false) + if (!llvm::getMaxNTIDx(F, maxntidx)) maxntidx = 1; else specified = true; - if (llvm::getMaxNTIDy(F, maxntidy) == false) + if (!llvm::getMaxNTIDy(F, maxntidy)) maxntidy = 1; else specified = true; - if (llvm::getMaxNTIDz(F, maxntidz) == false) + if (!llvm::getMaxNTIDz(F, maxntidz)) maxntidz = 1; else specified = true; @@ -673,7 +672,7 @@ static bool usedInOneFunc(const User *U, Function const *&oneFunc) { } for (const User *UU : U->users()) - if (usedInOneFunc(UU, oneFunc) == false) + if (!usedInOneFunc(UU, oneFunc)) return false; return true; @@ -687,7 +686,7 @@ static bool usedInOneFunc(const User *U, Function const *&oneFunc) { * 3. Is the global variable referenced only in one function? */ static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) { - if (gv->hasInternalLinkage() == false) + if (!gv->hasInternalLinkage()) return false; const PointerType *Pty = gv->getType(); if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED) @@ -696,7 +695,7 @@ static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) { const Function *oneFunc = nullptr; bool flag = usedInOneFunc(gv, oneFunc); - if (flag == false) + if (!flag) return false; if (!oneFunc) return false; @@ -1472,7 +1471,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { } } - if (PAL.hasAttribute(paramIndex + 1, Attribute::ByVal) == false) { + if (!PAL.hasAttribute(paramIndex + 1, Attribute::ByVal)) { if (Ty->isAggregateType() || Ty->isVectorTy()) { // Just print .param .align <a> .b8 .param[size]; // <a> = PAL.getparamalignment @@ -1788,7 +1787,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, break; } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { if (const ConstantInt *constInt = dyn_cast<ConstantInt>( - ConstantFoldConstantExpression(Cexpr, TD))) { + ConstantFoldConstantExpression(Cexpr, *TD))) { int int32 = (int)(constInt->getZExtValue()); ptr = (unsigned char *)&int32; aggBuffer->addBytes(ptr, 4, Bytes); @@ -1810,7 +1809,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, break; } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { if (const ConstantInt *constInt = dyn_cast<ConstantInt>( - ConstantFoldConstantExpression(Cexpr, TD))) { + ConstantFoldConstantExpression(Cexpr, *TD))) { long long int64 = (long long)(constInt->getZExtValue()); ptr = (unsigned char *)&int64; aggBuffer->addBytes(ptr, 8, Bytes); @@ -2085,13 +2084,6 @@ void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum, } } - -// Force static initialization. -extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() { - RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32); - RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64); -} - void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) { std::stringstream temp; LineReader *reader = this->getReader(filename.str()); diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h index 7e6b5e8..9b11e70 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.h +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h @@ -92,8 +92,8 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { bool EmitGeneric; public: - AggBuffer(unsigned _size, raw_ostream &_O, NVPTXAsmPrinter &_AP) - : size(_size), buffer(_size), O(_O), AP(_AP) { + AggBuffer(unsigned size, raw_ostream &O, NVPTXAsmPrinter &AP) + : size(size), buffer(size), O(O), AP(AP) { curpos = 0; numSymbols = 0; EmitGeneric = AP.EmitGeneric; diff --git a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp index f3a095d..6d7c99c 100644 --- a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp +++ b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp @@ -123,10 +123,9 @@ bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP( // => // %0 = gep X, indices // %1 = addrspacecast %0 - GetElementPtrInst *NewGEPI = GetElementPtrInst::Create(Cast->getOperand(0), - Indices, - GEP->getName(), - GEPI); + GetElementPtrInst *NewGEPI = GetElementPtrInst::Create( + GEP->getSourceElementType(), Cast->getOperand(0), Indices, + GEP->getName(), GEPI); NewGEPI->setIsInBounds(GEP->isInBounds()); GEP->replaceAllUsesWith( new AddrSpaceCastInst(NewGEPI, GEP->getType(), "", GEPI)); diff --git a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp index 86d134b..850c020 100644 --- a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp +++ b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp @@ -343,6 +343,7 @@ Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C, // GetElementPtrConstantExpr return cast<GEPOperator>(C)->isInBounds() ? Builder.CreateGEP( + cast<GEPOperator>(C)->getSourceElementType(), NewOperands[0], makeArrayRef(&NewOperands[1], NumOperands - 1)) : Builder.CreateInBoundsGEP( diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index e01c780..52c5e1b 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -78,10 +78,7 @@ bool NVPTXDAGToDAGISel::usePrecSqrtF32() const { return UsePrecSqrtF32; } else { // Otherwise, use sqrt.approx if fast math is enabled - if (TM.Options.UnsafeFPMath) - return false; - else - return true; + return !TM.Options.UnsafeFPMath; } } @@ -5044,12 +5041,12 @@ bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N, /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand( - const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) { + const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { SDValue Op0, Op1; - switch (ConstraintCode) { + switch (ConstraintID) { default: return true; - case 'm': // memory + case InlineAsm::Constraint_m: // memory if (SelectDirectAddr(Op, Op0)) { OutOps.push_back(Op0); OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32)); diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index ca432b5..6d845c9 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -48,7 +48,7 @@ public: const NVPTXSubtarget *Subtarget; bool SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, + unsigned ConstraintID, std::vector<SDValue> &OutOps) override; private: // Include the pieces autogenerated from the target description. diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index 1dc81f7..ff74e6e 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -930,7 +930,7 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args, } first = false; - if (Outs[OIdx].Flags.isByVal() == false) { + if (!Outs[OIdx].Flags.isByVal()) { if (Ty->isAggregateType() || Ty->isVectorTy()) { unsigned align = 0; const CallInst *CallI = cast<CallInst>(CS->getInstruction()); @@ -1075,7 +1075,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, EVT VT = Outs[OIdx].VT; Type *Ty = Args[i].Ty; - if (Outs[OIdx].Flags.isByVal() == false) { + if (!Outs[OIdx].Flags.isByVal()) { if (Ty->isAggregateType()) { // aggregate SmallVector<EVT, 16> vtparts; @@ -1459,7 +1459,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, ObjectVT) == NumElts && "Vector was not scalarized"); unsigned sz = EltVT.getSizeInBits(); - bool needTruncate = sz < 8 ? true : false; + bool needTruncate = sz < 8; if (NumElts == 1) { // Just a simple load @@ -1577,7 +1577,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, for (unsigned i = 0, e = Ins.size(); i != e; ++i) { unsigned sz = VTs[i].getSizeInBits(); unsigned AlignI = GreatestCommonDivisor64(RetAlign, Offsets[i]); - bool needTruncate = sz < 8 ? true : false; + bool needTruncate = sz < 8; if (VTs[i].isInteger() && (sz < 8)) sz = 8; @@ -1940,9 +1940,7 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { } // Then any remaining arguments - for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) { - Ops.push_back(N->getOperand(i)); - } + Ops.append(N->op_begin() + 2, N->op_end()); SDValue NewSt = DAG.getMemIntrinsicNode( Opcode, DL, DAG.getVTList(MVT::Other), Ops, @@ -2118,7 +2116,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( // to newly created nodes. The SDNodes for params have to // appear in the same order as their order of appearance // in the original function. "idx+1" holds that order. - if (PAL.hasAttribute(i + 1, Attribute::ByVal) == false) { + if (!PAL.hasAttribute(i + 1, Attribute::ByVal)) { if (Ty->isAggregateType()) { SmallVector<EVT, 16> vtparts; SmallVector<uint64_t, 16> offsets; @@ -4494,7 +4492,6 @@ NVPTXTargetObjectFile::~NVPTXTargetObjectFile() { delete DwarfLocSection; delete DwarfARangesSection; delete DwarfRangesSection; - delete DwarfMacroInfoSection; } const MCSection * diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h index 1b4da2c..8594364 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/lib/Target/NVPTX/NVPTXISelLowering.h @@ -497,6 +497,12 @@ public: std::vector<SDValue> &Ops, SelectionDAG &DAG) const override; + unsigned getInlineAsmMemConstraint( + const std::string &ConstraintCode) const override { + // FIXME: Map different constraints differently. + return InlineAsm::Constraint_m; + } + const NVPTXTargetMachine *nvTM; // PTX always uses 32-bit shift amounts diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp index f0c3663..578401a 100644 --- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp +++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp @@ -12,6 +12,8 @@ //===----------------------------------------------------------------------===// #include "NVPTXLowerAggrCopies.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/StackProtector.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" @@ -22,10 +24,33 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "nvptx" using namespace llvm; -namespace llvm { FunctionPass *createLowerAggrCopies(); } +namespace { +// actual analysis class, which is a functionpass +struct NVPTXLowerAggrCopies : public FunctionPass { + static char ID; + + NVPTXLowerAggrCopies() : FunctionPass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addPreserved<MachineFunctionAnalysis>(); + AU.addPreserved<StackProtector>(); + } + + bool runOnFunction(Function &F) override; + + static const unsigned MaxAggrCopySize = 128; + + const char *getPassName() const override { + return "Lower aggregate copies/intrinsics into loops"; + } +}; +} // namespace char NVPTXLowerAggrCopies::ID = 0; @@ -104,7 +129,7 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { SmallVector<MemTransferInst *, 4> aggrMemcpys; SmallVector<MemSetInst *, 4> aggrMemsets; - const DataLayout *DL = &getAnalysis<DataLayoutPass>().getDataLayout(); + const DataLayout &DL = F.getParent()->getDataLayout(); LLVMContext &Context = F.getParent()->getContext(); // @@ -117,10 +142,10 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { ++II) { if (LoadInst *load = dyn_cast<LoadInst>(II)) { - if (load->hasOneUse() == false) + if (!load->hasOneUse()) continue; - if (DL->getTypeStoreSize(load->getType()) < MaxAggrCopySize) + if (DL.getTypeStoreSize(load->getType()) < MaxAggrCopySize) continue; User *use = load->user_back(); @@ -166,7 +191,7 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { StoreInst *store = dyn_cast<StoreInst>(*load->user_begin()); Value *srcAddr = load->getOperand(0); Value *dstAddr = store->getOperand(1); - unsigned numLoads = DL->getTypeStoreSize(load->getType()); + unsigned numLoads = DL.getTypeStoreSize(load->getType()); Value *len = ConstantInt::get(Type::getInt32Ty(Context), numLoads); convertTransferToLoop(store, srcAddr, dstAddr, len, load->isVolatile(), diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h index da301d5..3c39f53 100644 --- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h +++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h @@ -15,35 +15,10 @@ #ifndef LLVM_LIB_TARGET_NVPTX_NVPTXLOWERAGGRCOPIES_H #define LLVM_LIB_TARGET_NVPTX_NVPTXLOWERAGGRCOPIES_H -#include "llvm/CodeGen/MachineFunctionAnalysis.h" -#include "llvm/CodeGen/StackProtector.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/Pass.h" - namespace llvm { +class FunctionPass; -// actual analysis class, which is a functionpass -struct NVPTXLowerAggrCopies : public FunctionPass { - static char ID; - - NVPTXLowerAggrCopies() : FunctionPass(ID) {} - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<DataLayoutPass>(); - AU.addPreserved<MachineFunctionAnalysis>(); - AU.addPreserved<StackProtector>(); - } - - bool runOnFunction(Function &F) override; - - static const unsigned MaxAggrCopySize = 128; - - const char *getPassName() const override { - return "Lower aggregate copies/intrinsics into loops"; - } -}; - -extern FunctionPass *createLowerAggrCopies(); +FunctionPass *createLowerAggrCopies(); } #endif diff --git a/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp b/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp index 3149399..68dfbb7 100644 --- a/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp +++ b/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp @@ -35,7 +35,8 @@ namespace llvm { void initializeNVPTXLowerStructArgsPass(PassRegistry &); } -class LLVM_LIBRARY_VISIBILITY NVPTXLowerStructArgs : public FunctionPass { +namespace { +class NVPTXLowerStructArgs : public FunctionPass { bool runOnFunction(Function &F) override; void handleStructPtrArgs(Function &); @@ -48,6 +49,7 @@ public: return "Copy structure (byval *) arguments to stack"; } }; +} // namespace char NVPTXLowerStructArgs::ID = 1; diff --git a/lib/Target/NVPTX/NVPTXMCExpr.h b/lib/Target/NVPTX/NVPTXMCExpr.h index d39a394..f075b8b 100644 --- a/lib/Target/NVPTX/NVPTXMCExpr.h +++ b/lib/Target/NVPTX/NVPTXMCExpr.h @@ -29,8 +29,8 @@ private: const VariantKind Kind; const APFloat Flt; - explicit NVPTXFloatMCExpr(VariantKind _Kind, APFloat _Flt) - : Kind(_Kind), Flt(_Flt) {} + explicit NVPTXFloatMCExpr(VariantKind Kind, APFloat Flt) + : Kind(Kind), Flt(Flt) {} public: /// @name Construction diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp index 5ca96e4..6e97f9ef 100644 --- a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp @@ -78,7 +78,7 @@ NVPTXRegisterInfo::NVPTXRegisterInfo() : NVPTXGenRegisterInfo(0) {} /// NVPTX Callee Saved Registers const MCPhysReg * -NVPTXRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { +NVPTXRegisterInfo::getCalleeSavedRegs(const MachineFunction *) const { static const MCPhysReg CalleeSavedRegs[] = { 0 }; return CalleeSavedRegs; } diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.h b/lib/Target/NVPTX/NVPTXRegisterInfo.h index 75b8f15..c310a9c 100644 --- a/lib/Target/NVPTX/NVPTXRegisterInfo.h +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.h @@ -35,8 +35,7 @@ public: //------------------------------------------------------ // NVPTX callee saved registers - const MCPhysReg * - getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override; + const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; BitVector getReservedRegs(const MachineFunction &MF) const override; diff --git a/lib/Target/NVPTX/NVPTXSection.h b/lib/Target/NVPTX/NVPTXSection.h index f1d3cb4..0d2627d 100644 --- a/lib/Target/NVPTX/NVPTXSection.h +++ b/lib/Target/NVPTX/NVPTXSection.h @@ -26,7 +26,7 @@ namespace llvm { class NVPTXSection : public MCSection { virtual void anchor(); public: - NVPTXSection(SectionVariant V, SectionKind K) : MCSection(V, K) {} + NVPTXSection(SectionVariant V, SectionKind K) : MCSection(V, K, nullptr) {} virtual ~NVPTXSection() {} /// Override this as NVPTX has its own way of printing switching @@ -36,11 +36,8 @@ public: const MCExpr *Subsection) const override {} /// Base address of PTX sections is zero. - bool isBaseAddressKnownZero() const override { return true; } bool UseCodeAlign() const override { return false; } bool isVirtualSection() const override { return false; } - std::string getLabelBeginName() const override { return ""; } - std::string getLabelEndName() const override { return ""; } }; } // end namespace llvm diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 1a267a6..1b6bc71 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -50,6 +50,7 @@ using namespace llvm; namespace llvm { void initializeNVVMReflectPass(PassRegistry&); void initializeGenericToNVVMPass(PassRegistry&); +void initializeNVPTXAllocaHoistingPass(PassRegistry &); void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&); void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &); void initializeNVPTXLowerStructArgsPass(PassRegistry &); @@ -64,6 +65,7 @@ extern "C" void LLVMInitializeNVPTXTarget() { // but it's very NVPTX-specific. initializeNVVMReflectPass(*PassRegistry::getPassRegistry()); initializeGenericToNVVMPass(*PassRegistry::getPassRegistry()); + initializeNVPTXAllocaHoistingPass(*PassRegistry::getPassRegistry()); initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry()); initializeNVPTXFavorNonGenericAddrSpacesPass( *PassRegistry::getPassRegistry()); @@ -86,9 +88,10 @@ NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, StringRef TT, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, bool is64bit) - : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), is64bit(is64bit), - TLOF(make_unique<NVPTXTargetObjectFile>()), - DL(computeDataLayout(is64bit)), Subtarget(TT, CPU, FS, *this) { + : LLVMTargetMachine(T, computeDataLayout(is64bit), TT, CPU, FS, Options, RM, + CM, OL), + is64bit(is64bit), TLOF(make_unique<NVPTXTargetObjectFile>()), + Subtarget(TT, CPU, FS, *this) { if (Triple(TT).getOS() == Triple::NVCL) drvInterface = NVPTX::NVCL; else @@ -183,8 +186,7 @@ void NVPTXPassConfig::addIRPasses() { } bool NVPTXPassConfig::addInstSelector() { - const NVPTXSubtarget &ST = - getTM<NVPTXTargetMachine>().getSubtarget<NVPTXSubtarget>(); + const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl(); addPass(createLowerAggrCopies()); addPass(createAllocaHoisting()); diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h index a81abfe..b8df5af 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.h +++ b/lib/Target/NVPTX/NVPTXTargetMachine.h @@ -27,7 +27,6 @@ namespace llvm { class NVPTXTargetMachine : public LLVMTargetMachine { bool is64bit; std::unique_ptr<TargetLoweringObjectFile> TLOF; - const DataLayout DL; // Calculates type size & alignment NVPTX::DrvInterface drvInterface; NVPTXSubtarget Subtarget; @@ -40,8 +39,10 @@ public: CodeModel::Model CM, CodeGenOpt::Level OP, bool is64bit); ~NVPTXTargetMachine() override; - const DataLayout *getDataLayout() const override { return &DL; } - const NVPTXSubtarget *getSubtargetImpl() const override { return &Subtarget; } + const NVPTXSubtarget *getSubtargetImpl(const Function &) const override { + return &Subtarget; + } + const NVPTXSubtarget *getSubtargetImpl() const { return &Subtarget; } bool is64Bit() const { return is64bit; } NVPTX::DrvInterface getDrvInterface() const { return drvInterface; } ManagedStringPool *getManagedStrPool() const { diff --git a/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/lib/Target/NVPTX/NVPTXTargetObjectFile.h index 00ceca5..5d9ab0d 100644 --- a/lib/Target/NVPTX/NVPTXTargetObjectFile.h +++ b/lib/Target/NVPTX/NVPTXTargetObjectFile.h @@ -41,7 +41,6 @@ public: DwarfLocSection = nullptr; DwarfARangesSection = nullptr; DwarfRangesSection = nullptr; - DwarfMacroInfoSection = nullptr; } virtual ~NVPTXTargetObjectFile(); @@ -83,8 +82,6 @@ public: new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); DwarfRangesSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); - DwarfMacroInfoSection = - new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); } const MCSection *getSectionForConstant(SectionKind Kind, diff --git a/lib/Target/NVPTX/NVPTXUtilities.cpp b/lib/Target/NVPTX/NVPTXUtilities.cpp index cf1feac..1f178af 100644 --- a/lib/Target/NVPTX/NVPTXUtilities.cpp +++ b/lib/Target/NVPTX/NVPTXUtilities.cpp @@ -293,12 +293,9 @@ bool llvm::isKernelFunction(const Function &F) { unsigned x = 0; bool retval = llvm::findOneNVVMAnnotation( &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISKERNEL_FUNCTION], x); - if (retval == false) { + if (!retval) { // There is no NVVM metadata, check the calling convention - if (F.getCallingConv() == llvm::CallingConv::PTX_Kernel) - return true; - else - return false; + return F.getCallingConv() == llvm::CallingConv::PTX_Kernel; } return (x == 1); } @@ -307,7 +304,7 @@ bool llvm::getAlign(const Function &F, unsigned index, unsigned &align) { std::vector<unsigned> Vs; bool retval = llvm::findAllNVVMAnnotation( &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_ALIGN], Vs); - if (retval == false) + if (!retval) return false; for (int i = 0, e = Vs.size(); i < e; i++) { unsigned v = Vs[i]; diff --git a/lib/Target/NVPTX/NVPTXutil.cpp b/lib/Target/NVPTX/NVPTXutil.cpp deleted file mode 100644 index 5f074b3..0000000 --- a/lib/Target/NVPTX/NVPTXutil.cpp +++ /dev/null @@ -1,90 +0,0 @@ -//===-- NVPTXutil.cpp - Functions exported to CodeGen --*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the functions that can be used in CodeGen. -// -//===----------------------------------------------------------------------===// - -#include "NVPTXutil.h" -#include "NVPTX.h" - -using namespace llvm; - -namespace llvm { - -bool isParamLoad(const MachineInstr *MI) { - if ((MI->getOpcode() != NVPTX::LD_i32_avar) && - (MI->getOpcode() != NVPTX::LD_i64_avar)) - return false; - if (MI->getOperand(2).isImm() == false) - return false; - if (MI->getOperand(2).getImm() != NVPTX::PTXLdStInstCode::PARAM) - return false; - return true; -} - -#define DATA_MASK 0x7f -#define DIGIT_WIDTH 7 -#define MORE_BYTES 0x80 - -static int encode_leb128(uint64_t val, int *nbytes, char *space, int splen) { - char *a; - char *end = space + splen; - - a = space; - do { - unsigned char uc; - - if (a >= end) - return 1; - uc = val & DATA_MASK; - val >>= DIGIT_WIDTH; - if (val != 0) - uc |= MORE_BYTES; - *a = uc; - a++; - } while (val); - *nbytes = a - space; - return 0; -} - -#undef DATA_MASK -#undef DIGIT_WIDTH -#undef MORE_BYTES - -uint64_t encode_leb128(const char *str) { - union { - uint64_t x; - char a[8]; - } temp64; - - temp64.x = 0; - - for (unsigned i = 0, e = strlen(str); i != e; ++i) - temp64.a[i] = str[e - 1 - i]; - - char encoded[16]; - int nbytes; - - int retval = encode_leb128(temp64.x, &nbytes, encoded, 16); - - (void) retval; - assert(retval == 0 && "Encoding to leb128 failed"); - - assert(nbytes <= 8 && - "Cannot support register names with leb128 encoding > 8 bytes"); - - temp64.x = 0; - for (int i = 0; i < nbytes; ++i) - temp64.a[i] = encoded[i]; - - return temp64.x; -} - -} // end namespace llvm diff --git a/lib/Target/NVPTX/NVPTXutil.h b/lib/Target/NVPTX/NVPTXutil.h deleted file mode 100644 index 1915dac..0000000 --- a/lib/Target/NVPTX/NVPTXutil.h +++ /dev/null @@ -1,25 +0,0 @@ -//===-- NVPTXutil.h - Functions exported to CodeGen --*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the functions that can be used in CodeGen. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXUTIL_H -#define LLVM_LIB_TARGET_NVPTX_NVPTXUTIL_H - -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstr.h" - -namespace llvm { -bool isParamLoad(const MachineInstr *); -uint64_t encode_leb128(const char *str); -} - -#endif diff --git a/lib/Target/NVPTX/NVVMReflect.cpp b/lib/Target/NVPTX/NVVMReflect.cpp index a8d6b95..5e375b7 100644 --- a/lib/Target/NVPTX/NVVMReflect.cpp +++ b/lib/Target/NVPTX/NVVMReflect.cpp @@ -29,6 +29,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_os_ostream.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include <map> #include <sstream> @@ -137,6 +138,26 @@ bool NVVMReflect::handleFunction(Function *ReflectFunction) { // ConstantArray can be found successfully, see if it can be // found in VarMap. If so, replace the uses of CallInst with the // value found in VarMap. If not, replace the use with value 0. + + // IR for __nvvm_reflect calls differs between CUDA versions: + // CUDA 6.5 and earlier uses this sequence: + // %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8 + // (i8 addrspace(4)* getelementptr inbounds + // ([8 x i8], [8 x i8] addrspace(4)* @str, i32 0, i32 0)) + // %reflect = tail call i32 @__nvvm_reflect(i8* %ptr) + // + // Value returned by Sym->getOperand(0) is a Constant with a + // ConstantDataSequential operand which can be converted to string and used + // for lookup. + // + // CUDA 7.0 does it slightly differently: + // %reflect = call i32 @__nvvm_reflect(i8* addrspacecast + // (i8 addrspace(1)* getelementptr inbounds + // ([8 x i8], [8 x i8] addrspace(1)* @str, i32 0, i32 0) to i8*)) + // + // In this case, we get a Constant with a GlobalVariable operand and we need + // to dig deeper to find its initializer with the string we'll use for lookup. + for (User *U : ReflectFunction->users()) { assert(isa<CallInst>(U) && "Only a call instruction can use _reflect"); CallInst *Reflect = cast<CallInst>(U); @@ -158,16 +179,23 @@ bool NVVMReflect::handleFunction(Function *ReflectFunction) { const Value *Sym = GEP->getOperand(0); assert(isa<Constant>(Sym) && "Format of _reflect function not recognized"); - const Constant *SymStr = cast<Constant>(Sym); + const Value *Operand = cast<Constant>(Sym)->getOperand(0); + if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Operand)) { + // For CUDA-7.0 style __nvvm_reflect calls we need to find operand's + // initializer. + assert(GV->hasInitializer() && + "Format of _reflect function not recognized"); + const Constant *Initializer = GV->getInitializer(); + Operand = Initializer; + } - assert(isa<ConstantDataSequential>(SymStr->getOperand(0)) && + assert(isa<ConstantDataSequential>(Operand) && "Format of _reflect function not recognized"); - - assert(cast<ConstantDataSequential>(SymStr->getOperand(0))->isCString() && + assert(cast<ConstantDataSequential>(Operand)->isCString() && "Format of _reflect function not recognized"); std::string ReflectArg = - cast<ConstantDataSequential>(SymStr->getOperand(0))->getAsString(); + cast<ConstantDataSequential>(Operand)->getAsString(); ReflectArg = ReflectArg.substr(0, ReflectArg.size() - 1); DEBUG(dbgs() << "Arg of _reflect : " << ReflectArg << "\n"); |