diff options
| author | Stephen Hines <srhines@google.com> | 2014-05-29 02:49:00 -0700 | 
|---|---|---|
| committer | Stephen Hines <srhines@google.com> | 2014-05-29 02:49:00 -0700 | 
| commit | dce4a407a24b04eebc6a376f8e62b41aaa7b071f (patch) | |
| tree | dcebc53f2b182f145a2e659393bf9a0472cedf23 /lib/Target/NVPTX | |
| parent | 220b921aed042f9e520c26cffd8282a94c66c3d5 (diff) | |
| download | external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.zip external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.tar.gz external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.tar.bz2 | |
Update LLVM for 3.5 rebase (r209712).
Change-Id: I149556c940fb7dc92d075273c87ff584f400941f
Diffstat (limited to 'lib/Target/NVPTX')
38 files changed, 4556 insertions, 405 deletions
| diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt index 029118a..4e35b18 100644 --- a/lib/Target/NVPTX/CMakeLists.txt +++ b/lib/Target/NVPTX/CMakeLists.txt @@ -9,6 +9,7 @@ tablegen(LLVM NVPTXGenSubtargetInfo.inc -gen-subtarget)  add_public_tablegen_target(NVPTXCommonTableGen)  set(NVPTXCodeGen_sources +  NVPTXFavorNonGenericAddrSpaces.cpp    NVPTXFrameLowering.cpp    NVPTXInstrInfo.cpp    NVPTXISelDAGToDAG.cpp @@ -26,6 +27,8 @@ set(NVPTXCodeGen_sources    NVPTXAssignValidGlobalNames.cpp    NVPTXPrologEpilogPass.cpp    NVPTXMCExpr.cpp +  NVPTXReplaceImageHandles.cpp +  NVPTXImageOptimizer.cpp    )  add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources}) diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp index cf165be..9618896 100644 --- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp +++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp @@ -11,7 +11,6 @@  //  //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "asm-printer"  #include "InstPrinter/NVPTXInstPrinter.h"  #include "MCTargetDesc/NVPTXBaseInfo.h"  #include "NVPTX.h" @@ -25,6 +24,8 @@  #include <cctype>  using namespace llvm; +#define DEBUG_TYPE "asm-printer" +  #include "NVPTXGenAsmWriter.inc" diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h index 93029ae..1fb3c57 100644 --- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h +++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h @@ -27,8 +27,8 @@ public:    NVPTXInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,                     const MCRegisterInfo &MRI, const MCSubtargetInfo &STI); -  virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; -  virtual void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot); +  void printRegName(raw_ostream &OS, unsigned RegNo) const override; +  void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot) override;    // Autogenerated by tblgen.    void printInstruction(const MCInst *MI, raw_ostream &O); @@ -37,15 +37,15 @@ public:    void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);    void printCvtMode(const MCInst *MI, int OpNum, raw_ostream &O, -                    const char *Modifier = 0); +                    const char *Modifier = nullptr);    void printCmpMode(const MCInst *MI, int OpNum, raw_ostream &O, -                    const char *Modifier = 0); +                    const char *Modifier = nullptr);    void printLdStCode(const MCInst *MI, int OpNum, -                     raw_ostream &O, const char *Modifier = 0); +                     raw_ostream &O, const char *Modifier = nullptr);    void printMemOperand(const MCInst *MI, int OpNum, -                       raw_ostream &O, const char *Modifier = 0); +                       raw_ostream &O, const char *Modifier = nullptr);    void printProtoIdent(const MCInst *MI, int OpNum, -                       raw_ostream &O, const char *Modifier = 0); +                       raw_ostream &O, const char *Modifier = nullptr);  };  } diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h index edf4a80..ddb122f 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h @@ -43,14 +43,16 @@ enum PropertyAnnotation {    PROPERTY_ISSAMPLER,    PROPERTY_ISREADONLY_IMAGE_PARAM,    PROPERTY_ISWRITEONLY_IMAGE_PARAM, +  PROPERTY_ISREADWRITE_IMAGE_PARAM,    PROPERTY_ISKERNEL_FUNCTION,    PROPERTY_ALIGN, +  PROPERTY_MANAGED,    // last property    PROPERTY_LAST  }; -const unsigned AnnotationNameLen = 8; // length of each annotation name +const unsigned AnnotationNameLen = 9; // length of each annotation name  const char PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = {    "maxntidx",                         // PROPERTY_MAXNTID_X    "maxntidy",                         // PROPERTY_MAXNTID_Y @@ -64,8 +66,10 @@ const char PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = {    "sampler",                          // PROPERTY_ISSAMPLER    "rdoimage",                         // PROPERTY_ISREADONLY_IMAGE_PARAM    "wroimage",                         // PROPERTY_ISWRITEONLY_IMAGE_PARAM +  "rdwrimage",                        // PROPERTY_ISREADWRITE_IMAGE_PARAM    "kernel",                           // PROPERTY_ISKERNEL_FUNCTION    "align",                            // PROPERTY_ALIGN +  "managed",                          // PROPERTY_MANAGED                // last property    "proplast", // PROPERTY_LAST diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp index 3cf6e4b..158ca90 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp @@ -20,6 +20,8 @@  #include "llvm/MC/MCSubtargetInfo.h"  #include "llvm/Support/TargetRegistry.h" +using namespace llvm; +  #define GET_INSTRINFO_MC_DESC  #include "NVPTXGenInstrInfo.inc" @@ -29,8 +31,6 @@  #define GET_REGINFO_MC_DESC  #include "NVPTXGenRegisterInfo.inc" -using namespace llvm; -  static MCInstrInfo *createNVPTXMCInstrInfo() {    MCInstrInfo *X = new MCInstrInfo();    InitNVPTXMCInstrInfo(X); @@ -66,7 +66,7 @@ static MCInstPrinter *createNVPTXMCInstPrinter(const Target &T,                                                 const MCSubtargetInfo &STI) {    if (SyntaxVariant == 0)      return new NVPTXInstPrinter(MAI, MII, MRI, STI); -  return 0; +  return nullptr;  }  // Force static initialization. diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h index 8cbdd47..e74c808 100644 --- a/lib/Target/NVPTX/NVPTX.h +++ b/lib/Target/NVPTX/NVPTX.h @@ -63,9 +63,12 @@ FunctionPass *  createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel);  ModulePass *createNVPTXAssignValidGlobalNamesPass();  ModulePass *createGenericToNVVMPass(); +FunctionPass *createNVPTXFavorNonGenericAddrSpacesPass();  ModulePass *createNVVMReflectPass();  ModulePass *createNVVMReflectPass(const StringMap<int>& Mapping);  MachineFunctionPass *createNVPTXPrologEpilogPass(); +MachineFunctionPass *createNVPTXReplaceImageHandlesPass(); +FunctionPass *createNVPTXImageOptimizerPass();  bool isImageOrSamplerVal(const Value *, const Module *); diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.h b/lib/Target/NVPTX/NVPTXAllocaHoisting.h index 22404b7..5b61068 100644 --- a/lib/Target/NVPTX/NVPTXAllocaHoisting.h +++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.h @@ -30,17 +30,17 @@ public:    static char ID; // Pass ID    NVPTXAllocaHoisting() : FunctionPass(ID) {} -  void getAnalysisUsage(AnalysisUsage &AU) const { +  void getAnalysisUsage(AnalysisUsage &AU) const override {      AU.addRequired<DataLayoutPass>();      AU.addPreserved("stack-protector");      AU.addPreserved<MachineFunctionAnalysis>();    } -  virtual const char *getPassName() const { +  const char *getPassName() const override {      return "NVPTX specific alloca hoisting";    } -  virtual bool runOnFunction(Function &function); +  bool runOnFunction(Function &function) override;  };  extern FunctionPass *createAllocaHoisting(); diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 97e2cc6..4ec575f 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -17,6 +17,7 @@  #include "MCTargetDesc/NVPTXMCAsmInfo.h"  #include "NVPTX.h"  #include "NVPTXInstrInfo.h" +#include "NVPTXMachineFunctionInfo.h"  #include "NVPTXMCExpr.h"  #include "NVPTXRegisterInfo.h"  #include "NVPTXTargetMachine.h" @@ -131,7 +132,7 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {      return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx);    const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV); -  if (CE == 0) +  if (!CE)      llvm_unreachable("Unknown constant value to lower!");    switch (CE->getOpcode()) { @@ -149,9 +150,24 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {        raw_string_ostream OS(S);        OS << "Unsupported expression in static initializer: ";        CE->printAsOperand(OS, /*PrintType=*/ false, -                     !AP.MF ? 0 : AP.MF->getFunction()->getParent()); +                         !AP.MF ? nullptr : AP.MF->getFunction()->getParent());        report_fatal_error(OS.str());      } +  case Instruction::AddrSpaceCast: { +    // Strip any addrspace(1)->addrspace(0) addrspace casts. These will be +    // handled by the generic() logic in the MCExpr printer +    PointerType *DstTy            = cast<PointerType>(CE->getType()); +    PointerType *SrcTy            = cast<PointerType>(CE->getOperand(0)->getType()); +    if (SrcTy->getAddressSpace() == 1 && DstTy->getAddressSpace() == 0) { +      return LowerConstant(cast<const Constant>(CE->getOperand(0)), AP); +    } +    std::string S; +    raw_string_ostream OS(S); +    OS << "Unsupported expression in static initializer: "; +    CE->printAsOperand(OS, /*PrintType=*/ false, +                       !AP.MF ? nullptr : AP.MF->getFunction()->getParent()); +    report_fatal_error(OS.str()); +  }    case Instruction::GetElementPtr: {      const DataLayout &TD = *AP.TM.getDataLayout();      // Generate a symbolic expression for the byte address @@ -310,13 +326,279 @@ void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {    EmitToStreamer(OutStreamer, Inst);  } +// Handle symbol backtracking for targets that do not support image handles +bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI, +                                           unsigned OpNo, MCOperand &MCOp) { +  const MachineOperand &MO = MI->getOperand(OpNo); + +  switch (MI->getOpcode()) { +  default: return false; +  case NVPTX::TEX_1D_F32_I32: +  case NVPTX::TEX_1D_F32_F32: +  case NVPTX::TEX_1D_F32_F32_LEVEL: +  case NVPTX::TEX_1D_F32_F32_GRAD: +  case NVPTX::TEX_1D_I32_I32: +  case NVPTX::TEX_1D_I32_F32: +  case NVPTX::TEX_1D_I32_F32_LEVEL: +  case NVPTX::TEX_1D_I32_F32_GRAD: +  case NVPTX::TEX_1D_ARRAY_F32_I32: +  case NVPTX::TEX_1D_ARRAY_F32_F32: +  case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL: +  case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD: +  case NVPTX::TEX_1D_ARRAY_I32_I32: +  case NVPTX::TEX_1D_ARRAY_I32_F32: +  case NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL: +  case NVPTX::TEX_1D_ARRAY_I32_F32_GRAD: +  case NVPTX::TEX_2D_F32_I32: +  case NVPTX::TEX_2D_F32_F32: +  case NVPTX::TEX_2D_F32_F32_LEVEL: +  case NVPTX::TEX_2D_F32_F32_GRAD: +  case NVPTX::TEX_2D_I32_I32: +  case NVPTX::TEX_2D_I32_F32: +  case NVPTX::TEX_2D_I32_F32_LEVEL: +  case NVPTX::TEX_2D_I32_F32_GRAD: +  case NVPTX::TEX_2D_ARRAY_F32_I32: +  case NVPTX::TEX_2D_ARRAY_F32_F32: +  case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL: +  case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD: +  case NVPTX::TEX_2D_ARRAY_I32_I32: +  case NVPTX::TEX_2D_ARRAY_I32_F32: +  case NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL: +  case NVPTX::TEX_2D_ARRAY_I32_F32_GRAD: +  case NVPTX::TEX_3D_F32_I32: +  case NVPTX::TEX_3D_F32_F32: +  case NVPTX::TEX_3D_F32_F32_LEVEL: +  case NVPTX::TEX_3D_F32_F32_GRAD: +  case NVPTX::TEX_3D_I32_I32: +  case NVPTX::TEX_3D_I32_F32: +  case NVPTX::TEX_3D_I32_F32_LEVEL: +  case NVPTX::TEX_3D_I32_F32_GRAD: +   { +    // This is a texture fetch, so operand 4 is a texref and operand 5 is +    // a samplerref +    if (OpNo == 4) { +      lowerImageHandleSymbol(MO.getImm(), MCOp); +      return true; +    } +    if (OpNo == 5) { +      lowerImageHandleSymbol(MO.getImm(), MCOp); +      return true; +    } + +    return false; +  } +  case NVPTX::SULD_1D_I8_TRAP: +  case NVPTX::SULD_1D_I16_TRAP: +  case NVPTX::SULD_1D_I32_TRAP: +  case NVPTX::SULD_1D_ARRAY_I8_TRAP: +  case NVPTX::SULD_1D_ARRAY_I16_TRAP: +  case NVPTX::SULD_1D_ARRAY_I32_TRAP: +  case NVPTX::SULD_2D_I8_TRAP: +  case NVPTX::SULD_2D_I16_TRAP: +  case NVPTX::SULD_2D_I32_TRAP: +  case NVPTX::SULD_2D_ARRAY_I8_TRAP: +  case NVPTX::SULD_2D_ARRAY_I16_TRAP: +  case NVPTX::SULD_2D_ARRAY_I32_TRAP: +  case NVPTX::SULD_3D_I8_TRAP: +  case NVPTX::SULD_3D_I16_TRAP: +  case NVPTX::SULD_3D_I32_TRAP: { +    // This is a V1 surface load, so operand 1 is a surfref +    if (OpNo == 1) { +      lowerImageHandleSymbol(MO.getImm(), MCOp); +      return true; +    } + +    return false; +  } +  case NVPTX::SULD_1D_V2I8_TRAP: +  case NVPTX::SULD_1D_V2I16_TRAP: +  case NVPTX::SULD_1D_V2I32_TRAP: +  case NVPTX::SULD_1D_ARRAY_V2I8_TRAP: +  case NVPTX::SULD_1D_ARRAY_V2I16_TRAP: +  case NVPTX::SULD_1D_ARRAY_V2I32_TRAP: +  case NVPTX::SULD_2D_V2I8_TRAP: +  case NVPTX::SULD_2D_V2I16_TRAP: +  case NVPTX::SULD_2D_V2I32_TRAP: +  case NVPTX::SULD_2D_ARRAY_V2I8_TRAP: +  case NVPTX::SULD_2D_ARRAY_V2I16_TRAP: +  case NVPTX::SULD_2D_ARRAY_V2I32_TRAP: +  case NVPTX::SULD_3D_V2I8_TRAP: +  case NVPTX::SULD_3D_V2I16_TRAP: +  case NVPTX::SULD_3D_V2I32_TRAP: { +    // This is a V2 surface load, so operand 2 is a surfref +    if (OpNo == 2) { +      lowerImageHandleSymbol(MO.getImm(), MCOp); +      return true; +    } + +    return false; +  } +  case NVPTX::SULD_1D_V4I8_TRAP: +  case NVPTX::SULD_1D_V4I16_TRAP: +  case NVPTX::SULD_1D_V4I32_TRAP: +  case NVPTX::SULD_1D_ARRAY_V4I8_TRAP: +  case NVPTX::SULD_1D_ARRAY_V4I16_TRAP: +  case NVPTX::SULD_1D_ARRAY_V4I32_TRAP: +  case NVPTX::SULD_2D_V4I8_TRAP: +  case NVPTX::SULD_2D_V4I16_TRAP: +  case NVPTX::SULD_2D_V4I32_TRAP: +  case NVPTX::SULD_2D_ARRAY_V4I8_TRAP: +  case NVPTX::SULD_2D_ARRAY_V4I16_TRAP: +  case NVPTX::SULD_2D_ARRAY_V4I32_TRAP: +  case NVPTX::SULD_3D_V4I8_TRAP: +  case NVPTX::SULD_3D_V4I16_TRAP: +  case NVPTX::SULD_3D_V4I32_TRAP: { +    // This is a V4 surface load, so operand 4 is a surfref +    if (OpNo == 4) { +      lowerImageHandleSymbol(MO.getImm(), MCOp); +      return true; +    } + +    return false; +  } +  case NVPTX::SUST_B_1D_B8_TRAP: +  case NVPTX::SUST_B_1D_B16_TRAP: +  case NVPTX::SUST_B_1D_B32_TRAP: +  case NVPTX::SUST_B_1D_V2B8_TRAP: +  case NVPTX::SUST_B_1D_V2B16_TRAP: +  case NVPTX::SUST_B_1D_V2B32_TRAP: +  case NVPTX::SUST_B_1D_V4B8_TRAP: +  case NVPTX::SUST_B_1D_V4B16_TRAP: +  case NVPTX::SUST_B_1D_V4B32_TRAP: +  case NVPTX::SUST_B_1D_ARRAY_B8_TRAP: +  case NVPTX::SUST_B_1D_ARRAY_B16_TRAP: +  case NVPTX::SUST_B_1D_ARRAY_B32_TRAP: +  case NVPTX::SUST_B_1D_ARRAY_V2B8_TRAP: +  case NVPTX::SUST_B_1D_ARRAY_V2B16_TRAP: +  case NVPTX::SUST_B_1D_ARRAY_V2B32_TRAP: +  case NVPTX::SUST_B_1D_ARRAY_V4B8_TRAP: +  case NVPTX::SUST_B_1D_ARRAY_V4B16_TRAP: +  case NVPTX::SUST_B_1D_ARRAY_V4B32_TRAP: +  case NVPTX::SUST_B_2D_B8_TRAP: +  case NVPTX::SUST_B_2D_B16_TRAP: +  case NVPTX::SUST_B_2D_B32_TRAP: +  case NVPTX::SUST_B_2D_V2B8_TRAP: +  case NVPTX::SUST_B_2D_V2B16_TRAP: +  case NVPTX::SUST_B_2D_V2B32_TRAP: +  case NVPTX::SUST_B_2D_V4B8_TRAP: +  case NVPTX::SUST_B_2D_V4B16_TRAP: +  case NVPTX::SUST_B_2D_V4B32_TRAP: +  case NVPTX::SUST_B_2D_ARRAY_B8_TRAP: +  case NVPTX::SUST_B_2D_ARRAY_B16_TRAP: +  case NVPTX::SUST_B_2D_ARRAY_B32_TRAP: +  case NVPTX::SUST_B_2D_ARRAY_V2B8_TRAP: +  case NVPTX::SUST_B_2D_ARRAY_V2B16_TRAP: +  case NVPTX::SUST_B_2D_ARRAY_V2B32_TRAP: +  case NVPTX::SUST_B_2D_ARRAY_V4B8_TRAP: +  case NVPTX::SUST_B_2D_ARRAY_V4B16_TRAP: +  case NVPTX::SUST_B_2D_ARRAY_V4B32_TRAP: +  case NVPTX::SUST_B_3D_B8_TRAP: +  case NVPTX::SUST_B_3D_B16_TRAP: +  case NVPTX::SUST_B_3D_B32_TRAP: +  case NVPTX::SUST_B_3D_V2B8_TRAP: +  case NVPTX::SUST_B_3D_V2B16_TRAP: +  case NVPTX::SUST_B_3D_V2B32_TRAP: +  case NVPTX::SUST_B_3D_V4B8_TRAP: +  case NVPTX::SUST_B_3D_V4B16_TRAP: +  case NVPTX::SUST_B_3D_V4B32_TRAP: +  case NVPTX::SUST_P_1D_B8_TRAP: +  case NVPTX::SUST_P_1D_B16_TRAP: +  case NVPTX::SUST_P_1D_B32_TRAP: +  case NVPTX::SUST_P_1D_V2B8_TRAP: +  case NVPTX::SUST_P_1D_V2B16_TRAP: +  case NVPTX::SUST_P_1D_V2B32_TRAP: +  case NVPTX::SUST_P_1D_V4B8_TRAP: +  case NVPTX::SUST_P_1D_V4B16_TRAP: +  case NVPTX::SUST_P_1D_V4B32_TRAP: +  case NVPTX::SUST_P_1D_ARRAY_B8_TRAP: +  case NVPTX::SUST_P_1D_ARRAY_B16_TRAP: +  case NVPTX::SUST_P_1D_ARRAY_B32_TRAP: +  case NVPTX::SUST_P_1D_ARRAY_V2B8_TRAP: +  case NVPTX::SUST_P_1D_ARRAY_V2B16_TRAP: +  case NVPTX::SUST_P_1D_ARRAY_V2B32_TRAP: +  case NVPTX::SUST_P_1D_ARRAY_V4B8_TRAP: +  case NVPTX::SUST_P_1D_ARRAY_V4B16_TRAP: +  case NVPTX::SUST_P_1D_ARRAY_V4B32_TRAP: +  case NVPTX::SUST_P_2D_B8_TRAP: +  case NVPTX::SUST_P_2D_B16_TRAP: +  case NVPTX::SUST_P_2D_B32_TRAP: +  case NVPTX::SUST_P_2D_V2B8_TRAP: +  case NVPTX::SUST_P_2D_V2B16_TRAP: +  case NVPTX::SUST_P_2D_V2B32_TRAP: +  case NVPTX::SUST_P_2D_V4B8_TRAP: +  case NVPTX::SUST_P_2D_V4B16_TRAP: +  case NVPTX::SUST_P_2D_V4B32_TRAP: +  case NVPTX::SUST_P_2D_ARRAY_B8_TRAP: +  case NVPTX::SUST_P_2D_ARRAY_B16_TRAP: +  case NVPTX::SUST_P_2D_ARRAY_B32_TRAP: +  case NVPTX::SUST_P_2D_ARRAY_V2B8_TRAP: +  case NVPTX::SUST_P_2D_ARRAY_V2B16_TRAP: +  case NVPTX::SUST_P_2D_ARRAY_V2B32_TRAP: +  case NVPTX::SUST_P_2D_ARRAY_V4B8_TRAP: +  case NVPTX::SUST_P_2D_ARRAY_V4B16_TRAP: +  case NVPTX::SUST_P_2D_ARRAY_V4B32_TRAP: +  case NVPTX::SUST_P_3D_B8_TRAP: +  case NVPTX::SUST_P_3D_B16_TRAP: +  case NVPTX::SUST_P_3D_B32_TRAP: +  case NVPTX::SUST_P_3D_V2B8_TRAP: +  case NVPTX::SUST_P_3D_V2B16_TRAP: +  case NVPTX::SUST_P_3D_V2B32_TRAP: +  case NVPTX::SUST_P_3D_V4B8_TRAP: +  case NVPTX::SUST_P_3D_V4B16_TRAP: +  case NVPTX::SUST_P_3D_V4B32_TRAP: { +    // This is a surface store, so operand 0 is a surfref +    if (OpNo == 0) { +      lowerImageHandleSymbol(MO.getImm(), MCOp); +      return true; +    } + +    return false; +  } +  case NVPTX::TXQ_CHANNEL_ORDER: +  case NVPTX::TXQ_CHANNEL_DATA_TYPE: +  case NVPTX::TXQ_WIDTH: +  case NVPTX::TXQ_HEIGHT: +  case NVPTX::TXQ_DEPTH: +  case NVPTX::TXQ_ARRAY_SIZE: +  case NVPTX::TXQ_NUM_SAMPLES: +  case NVPTX::TXQ_NUM_MIPMAP_LEVELS: +  case NVPTX::SUQ_CHANNEL_ORDER: +  case NVPTX::SUQ_CHANNEL_DATA_TYPE: +  case NVPTX::SUQ_WIDTH: +  case NVPTX::SUQ_HEIGHT: +  case NVPTX::SUQ_DEPTH: +  case NVPTX::SUQ_ARRAY_SIZE: { +    // This is a query, so operand 1 is a surfref/texref +    if (OpNo == 1) { +      lowerImageHandleSymbol(MO.getImm(), MCOp); +      return true; +    } + +    return false; +  } +  } +} + +void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) { +  // Ewwww +  TargetMachine &TM = const_cast<TargetMachine&>(MF->getTarget()); +  NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM); +  const NVPTXMachineFunctionInfo *MFI = MF->getInfo<NVPTXMachineFunctionInfo>(); +  const char *Sym = MFI->getImageHandleSymbol(Index); +  std::string *SymNamePtr = +    nvTM.getManagedStrPool()->getManagedString(Sym); +  MCOp = GetSymbolRef(OutContext.GetOrCreateSymbol( +    StringRef(SymNamePtr->c_str()))); +} +  void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {    OutMI.setOpcode(MI->getOpcode()); +  const NVPTXSubtarget &ST = TM.getSubtarget<NVPTXSubtarget>();    // Special: Do not mangle symbol operand of CALL_PROTOTYPE    if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {      const MachineOperand &MO = MI->getOperand(0); -    OutMI.addOperand(GetSymbolRef(MO, +    OutMI.addOperand(GetSymbolRef(        OutContext.GetOrCreateSymbol(Twine(MO.getSymbolName()))));      return;    } @@ -325,6 +607,13 @@ void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {      const MachineOperand &MO = MI->getOperand(i);      MCOperand MCOp; +    if (!ST.hasImageHandles()) { +      if (lowerImageHandleOperand(MI, i, MCOp)) { +        OutMI.addOperand(MCOp); +        continue; +      } +    } +      if (lowerOperand(MO, MCOp))        OutMI.addOperand(MCOp);    } @@ -345,10 +634,10 @@ bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,          MO.getMBB()->getSymbol(), OutContext));      break;    case MachineOperand::MO_ExternalSymbol: -    MCOp = GetSymbolRef(MO, GetExternalSymbolSymbol(MO.getSymbolName())); +    MCOp = GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName()));      break;    case MachineOperand::MO_GlobalAddress: -    MCOp = GetSymbolRef(MO, getSymbol(MO.getGlobal())); +    MCOp = GetSymbolRef(getSymbol(MO.getGlobal()));      break;    case MachineOperand::MO_FPImmediate: {      const ConstantFP *Cnt = MO.getFPImm(); @@ -407,8 +696,7 @@ unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {    }  } -MCOperand NVPTXAsmPrinter::GetSymbolRef(const MachineOperand &MO, -                                        const MCSymbol *Symbol) { +MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {    const MCExpr *Expr;    Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,                                   OutContext); @@ -750,7 +1038,7 @@ static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {    if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED)      return false; -  const Function *oneFunc = 0; +  const Function *oneFunc = nullptr;    bool flag = usedInOneFunc(gv, oneFunc);    if (flag == false) @@ -1010,6 +1298,8 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) {    for (i = 0; i < n; i++)      global_list.insert(global_list.end(), gv_array[i]); +  clearAnnotationCache(&M); +    delete[] gv_array;    return ret; @@ -1105,10 +1395,10 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,    if (llvm::isSampler(*GVar)) {      O << ".global .samplerref " << llvm::getSamplerName(*GVar); -    const Constant *Initializer = NULL; +    const Constant *Initializer = nullptr;      if (GVar->hasInitializer())        Initializer = GVar->getInitializer(); -    const ConstantInt *CI = NULL; +    const ConstantInt *CI = nullptr;      if (Initializer)        CI = dyn_cast<ConstantInt>(Initializer);      if (CI) { @@ -1175,7 +1465,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,        return;    } -  const Function *demotedFunc = 0; +  const Function *demotedFunc = nullptr;    if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {      O << "// " << GVar->getName().str() << " has been demoted\n";      if (localDecls.find(demotedFunc) != localDecls.end()) @@ -1347,7 +1637,7 @@ NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const {        return "u32";    }    llvm_unreachable("unexpected type"); -  return NULL; +  return nullptr;  }  void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, @@ -1495,19 +1785,33 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {      first = false;      // Handle image/sampler parameters -    if (llvm::isSampler(*I) || llvm::isImage(*I)) { -      if (llvm::isImage(*I)) { -        std::string sname = I->getName(); -        if (llvm::isImageWriteOnly(*I)) -          O << "\t.param .surfref " << *getSymbol(F) << "_param_" -            << paramIndex; -        else // Default image is read_only -          O << "\t.param .texref " << *getSymbol(F) << "_param_" -            << paramIndex; -      } else // Should be llvm::isSampler(*I) -        O << "\t.param .samplerref " << *getSymbol(F) << "_param_" -          << paramIndex; -      continue; +    if (isKernelFunction(*F)) { +      if (isSampler(*I) || isImage(*I)) { +        if (isImage(*I)) { +          std::string sname = I->getName(); +          if (isImageWriteOnly(*I) || isImageReadWrite(*I)) { +            if (nvptxSubtarget.hasImageHandles()) +              O << "\t.param .u64 .ptr .surfref "; +            else +              O << "\t.param .surfref "; +            O << *CurrentFnSym << "_param_" << paramIndex; +          } +          else { // Default image is read_only +            if (nvptxSubtarget.hasImageHandles()) +              O << "\t.param .u64 .ptr .texref "; +            else +              O << "\t.param .texref "; +            O << *CurrentFnSym << "_param_" << paramIndex; +          } +        } else { +          if (nvptxSubtarget.hasImageHandles()) +            O << "\t.param .u64 .ptr .samplerref "; +          else +            O << "\t.param .samplerref "; +          O << *CurrentFnSym << "_param_" << paramIndex; +        } +        continue; +      }      }      if (PAL.hasAttribute(paramIndex + 1, Attribute::ByVal) == false) { @@ -1752,13 +2056,35 @@ void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {      return;    }    if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { -    O << *getSymbol(GVar); +    PointerType *PTy = dyn_cast<PointerType>(GVar->getType()); +    bool IsNonGenericPointer = false; +    if (PTy && PTy->getAddressSpace() != 0) { +      IsNonGenericPointer = true; +    } +    if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) { +      O << "generic("; +      O << *getSymbol(GVar); +      O << ")"; +    } else { +      O << *getSymbol(GVar); +    }      return;    }    if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {      const Value *v = Cexpr->stripPointerCasts(); +    PointerType *PTy = dyn_cast<PointerType>(Cexpr->getType()); +    bool IsNonGenericPointer = false; +    if (PTy && PTy->getAddressSpace() != 0) { +      IsNonGenericPointer = true; +    }      if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) { -      O << *getSymbol(GVar); +      if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) { +        O << "generic("; +        O << *getSymbol(GVar); +        O << ")"; +      } else { +        O << *getSymbol(GVar); +      }        return;      } else {        O << *LowerConstant(CPV, *this); @@ -2121,7 +2447,7 @@ void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) {  }  LineReader *NVPTXAsmPrinter::getReader(std::string filename) { -  if (reader == NULL) { +  if (!reader) {      reader = new LineReader(filename);    } diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h index 7162420..a9f9bdd 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.h +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h @@ -96,6 +96,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {      unsigned curpos;      raw_ostream &O;      NVPTXAsmPrinter &AP; +    bool EmitGeneric;    public:      AggBuffer(unsigned _size, raw_ostream &_O, NVPTXAsmPrinter &_AP) @@ -104,6 +105,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {        size = _size;        curpos = 0;        numSymbols = 0; +      EmitGeneric = AP.EmitGeneric;      }      ~AggBuffer() { delete[] buffer; }      unsigned addBytes(unsigned char *Ptr, int Num, int Bytes) { @@ -155,7 +157,18 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {              const Value *v = Symbols[nSym];              if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {                MCSymbol *Name = AP.getSymbol(GVar); -              O << *Name; +              PointerType *PTy = dyn_cast<PointerType>(GVar->getType()); +              bool IsNonGenericPointer = false; +              if (PTy && PTy->getAddressSpace() != 0) { +                IsNonGenericPointer = true; +              } +              if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) { +                O << "generic("; +                O << *Name; +                O << ")"; +              } else { +                O << *Name; +              }              } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(v)) {                O << *nvptx::LowerConstant(Cexpr, AP);              } else @@ -176,31 +189,31 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {    friend class AggBuffer; -  virtual void emitSrcInText(StringRef filename, unsigned line); +  void emitSrcInText(StringRef filename, unsigned line);  private: -  virtual const char *getPassName() const { return "NVPTX Assembly Printer"; } +  const char *getPassName() const override { return "NVPTX Assembly Printer"; }    const Function *F;    std::string CurrentFnName; -  void EmitFunctionEntryLabel(); -  void EmitFunctionBodyStart(); -  void EmitFunctionBodyEnd(); -  void emitImplicitDef(const MachineInstr *MI) const; +  void EmitFunctionEntryLabel() override; +  void EmitFunctionBodyStart() override; +  void EmitFunctionBodyEnd() override; +  void emitImplicitDef(const MachineInstr *MI) const override; -  void EmitInstruction(const MachineInstr *); +  void EmitInstruction(const MachineInstr *) override;    void lowerToMCInst(const MachineInstr *MI, MCInst &OutMI);    bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp); -  MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol); +  MCOperand GetSymbolRef(const MCSymbol *Symbol);    unsigned encodeVirtualRegister(unsigned Reg); -  void EmitAlignment(unsigned NumBits, const GlobalValue *GV = 0) const {} +  void EmitAlignment(unsigned NumBits, const GlobalValue *GV = nullptr) const {}    void printVecModifiedImmediate(const MachineOperand &MO, const char *Modifier,                                   raw_ostream &O);    void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O, -                       const char *Modifier = 0); +                       const char *Modifier = nullptr);    void printImplicitDef(const MachineInstr *MI, raw_ostream &O) const;    void printModuleLevelGV(const GlobalVariable *GVar, raw_ostream &O,                            bool = false); @@ -221,15 +234,15 @@ private:    void printReturnValStr(const MachineFunction &MF, raw_ostream &O);    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,                         unsigned AsmVariant, const char *ExtraCode, -                       raw_ostream &); +                       raw_ostream &) override;    void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O, -                    const char *Modifier = 0); +                    const char *Modifier = nullptr);    bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,                               unsigned AsmVariant, const char *ExtraCode, -                             raw_ostream &); +                             raw_ostream &) override;  protected: -  bool doInitialization(Module &M); -  bool doFinalization(Module &M); +  bool doInitialization(Module &M) override; +  bool doFinalization(Module &M) override;  private:    std::string CurrentBankselLabelInBasicBlock; @@ -274,14 +287,33 @@ private:    static const char *getRegisterName(unsigned RegNo);    void emitDemotedVars(const Function *, raw_ostream &); +  bool lowerImageHandleOperand(const MachineInstr *MI, unsigned OpNo, +                               MCOperand &MCOp); +  void lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp); +    LineReader *reader;    LineReader *getReader(std::string); + +  // Used to control the need to emit .generic() in the initializer of +  // module scope variables. +  // Although ptx supports the hybrid mode like the following, +  //    .global .u32 a; +  //    .global .u32 b; +  //    .global .u32 addr[] = {a, generic(b)} +  // we have difficulty representing the difference in the NVVM IR. +  // +  // Since the address value should always be generic in CUDA C and always +  // be specific in OpenCL, we use this simple control here. +  // +  bool EmitGeneric; +  public:    NVPTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)        : AsmPrinter(TM, Streamer),          nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {      CurrentBankselLabelInBasicBlock = ""; -    reader = NULL; +    reader = nullptr; +    EmitGeneric = (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA);    }    ~NVPTXAsmPrinter() { diff --git a/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp b/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp index 158c482..962b123 100644 --- a/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp +++ b/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp @@ -33,7 +33,7 @@ public:    static char ID;    NVPTXAssignValidGlobalNames() : ModulePass(ID) {} -  virtual bool runOnModule(Module &M); +  bool runOnModule(Module &M) override;    /// \brief Clean up the name to remove symbols invalid in PTX.    std::string cleanUpName(StringRef Name); diff --git a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp new file mode 100644 index 0000000..f3a095d --- /dev/null +++ b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp @@ -0,0 +1,195 @@ +//===-- NVPTXFavorNonGenericAddrSpace.cpp - ---------------------*- C++ -*-===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// When a load/store accesses the generic address space, checks whether the +// address is casted from a non-generic address space. If so, remove this +// addrspacecast because accessing non-generic address spaces is typically +// faster. Besides seeking addrspacecasts, this optimization also traces into +// the base pointer of a GEP. +// +// For instance, the code below loads a float from an array allocated in +// addrspace(3). +// +// %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]* +// %1 = gep [10 x float]* %0, i64 0, i64 %i +// %2 = load float* %1 ; emits ld.f32 +// +// First, function hoistAddrSpaceCastFromGEP reorders the addrspacecast +// and the GEP to expose more optimization opportunities to function +// optimizeMemoryInst. The intermediate code looks like: +// +// %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i +// %1 = addrspacecast float addrspace(3)* %0 to float* +// %2 = load float* %1 ; still emits ld.f32, but will be optimized shortly +// +// Then, function optimizeMemoryInstruction detects a load from addrspacecast'ed +// generic pointers, and folds the load and the addrspacecast into a load from +// the original address space. The final code looks like: +// +// %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i +// %2 = load float addrspace(3)* %0 ; emits ld.shared.f32 +// +// This pass may remove an addrspacecast in a different BB. Therefore, we +// implement it as a FunctionPass. +// +//===----------------------------------------------------------------------===// + +#include "NVPTX.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Operator.h" +#include "llvm/Support/CommandLine.h" + +using namespace llvm; + +// An option to disable this optimization. Enable it by default. +static cl::opt<bool> DisableFavorNonGeneric( +  "disable-nvptx-favor-non-generic", +  cl::init(false), +  cl::desc("Do not convert generic address space usage " +           "to non-generic address space usage"), +  cl::Hidden); + +namespace { +/// \brief NVPTXFavorNonGenericAddrSpaces +class NVPTXFavorNonGenericAddrSpaces : public FunctionPass { +public: +  static char ID; +  NVPTXFavorNonGenericAddrSpaces() : FunctionPass(ID) {} + +  bool runOnFunction(Function &F) override; + +  /// Optimizes load/store instructions. Idx is the index of the pointer operand +  /// (0 for load, and 1 for store). Returns true if it changes anything. +  bool optimizeMemoryInstruction(Instruction *I, unsigned Idx); +  /// Transforms "gep (addrspacecast X), indices" into "addrspacecast (gep X, +  /// indices)".  This reordering exposes to optimizeMemoryInstruction more +  /// optimization opportunities on loads and stores. Returns true if it changes +  /// the program. +  bool hoistAddrSpaceCastFromGEP(GEPOperator *GEP); +}; +} + +char NVPTXFavorNonGenericAddrSpaces::ID = 0; + +namespace llvm { +void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &); +} +INITIALIZE_PASS(NVPTXFavorNonGenericAddrSpaces, "nvptx-favor-non-generic", +                "Remove unnecessary non-generic-to-generic addrspacecasts", +                false, false) + +// Decides whether removing Cast is valid and beneficial. Cast can be an +// instruction or a constant expression. +static bool IsEliminableAddrSpaceCast(Operator *Cast) { +  // Returns false if not even an addrspacecast. +  if (Cast->getOpcode() != Instruction::AddrSpaceCast) +    return false; + +  Value *Src = Cast->getOperand(0); +  PointerType *SrcTy = cast<PointerType>(Src->getType()); +  PointerType *DestTy = cast<PointerType>(Cast->getType()); +  // TODO: For now, we only handle the case where the addrspacecast only changes +  // the address space but not the type. If the type also changes, we could +  // still get rid of the addrspacecast by adding an extra bitcast, but we +  // rarely see such scenarios. +  if (SrcTy->getElementType() != DestTy->getElementType()) +    return false; + +  // Checks whether the addrspacecast is from a non-generic address space to the +  // generic address space. +  return (SrcTy->getAddressSpace() != AddressSpace::ADDRESS_SPACE_GENERIC && +          DestTy->getAddressSpace() == AddressSpace::ADDRESS_SPACE_GENERIC); +} + +bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP( +    GEPOperator *GEP) { +  Operator *Cast = dyn_cast<Operator>(GEP->getPointerOperand()); +  if (!Cast) +    return false; + +  if (!IsEliminableAddrSpaceCast(Cast)) +    return false; + +  SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end()); +  if (Instruction *GEPI = dyn_cast<Instruction>(GEP)) { +    // %1 = gep (addrspacecast X), indices +    // => +    // %0 = gep X, indices +    // %1 = addrspacecast %0 +    GetElementPtrInst *NewGEPI = GetElementPtrInst::Create(Cast->getOperand(0), +                                                           Indices, +                                                           GEP->getName(), +                                                           GEPI); +    NewGEPI->setIsInBounds(GEP->isInBounds()); +    GEP->replaceAllUsesWith( +        new AddrSpaceCastInst(NewGEPI, GEP->getType(), "", GEPI)); +  } else { +    // GEP is a constant expression. +    Constant *NewGEPCE = ConstantExpr::getGetElementPtr( +        cast<Constant>(Cast->getOperand(0)), +        Indices, +        GEP->isInBounds()); +    GEP->replaceAllUsesWith( +        ConstantExpr::getAddrSpaceCast(NewGEPCE, GEP->getType())); +  } + +  return true; +} + +bool NVPTXFavorNonGenericAddrSpaces::optimizeMemoryInstruction(Instruction *MI, +                                                               unsigned Idx) { +  // If the pointer operand is a GEP, hoist the addrspacecast if any from the +  // GEP to expose more optimization opportunites. +  if (GEPOperator *GEP = dyn_cast<GEPOperator>(MI->getOperand(Idx))) { +    hoistAddrSpaceCastFromGEP(GEP); +  } + +  // load/store (addrspacecast X) => load/store X if shortcutting the +  // addrspacecast is valid and can improve performance. +  // +  // e.g., +  // %1 = addrspacecast float addrspace(3)* %0 to float* +  // %2 = load float* %1 +  // -> +  // %2 = load float addrspace(3)* %0 +  // +  // Note: the addrspacecast can also be a constant expression. +  if (Operator *Cast = dyn_cast<Operator>(MI->getOperand(Idx))) { +    if (IsEliminableAddrSpaceCast(Cast)) { +      MI->setOperand(Idx, Cast->getOperand(0)); +      return true; +    } +  } + +  return false; +} + +bool NVPTXFavorNonGenericAddrSpaces::runOnFunction(Function &F) { +  if (DisableFavorNonGeneric) +    return false; + +  bool Changed = false; +  for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B) { +    for (BasicBlock::iterator I = B->begin(), IE = B->end(); I != IE; ++I) { +      if (isa<LoadInst>(I)) { +        // V = load P +        Changed |= optimizeMemoryInstruction(I, 0); +      } else if (isa<StoreInst>(I)) { +        // store V, P +        Changed |= optimizeMemoryInstruction(I, 1); +      } +    } +  } +  return Changed; +} + +FunctionPass *llvm::createNVPTXFavorNonGenericAddrSpacesPass() { +  return new NVPTXFavorNonGenericAddrSpaces(); +} diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.h b/lib/Target/NVPTX/NVPTXFrameLowering.h index 819f1dd..2ae6d72 100644 --- a/lib/Target/NVPTX/NVPTXFrameLowering.h +++ b/lib/Target/NVPTX/NVPTXFrameLowering.h @@ -28,13 +28,13 @@ public:        : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0), tm(_tm),          is64bit(_is64bit) {} -  virtual bool hasFP(const MachineFunction &MF) const; -  virtual void emitPrologue(MachineFunction &MF) const; -  virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; +  bool hasFP(const MachineFunction &MF) const override; +  void emitPrologue(MachineFunction &MF) const override; +  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;    void eliminateCallFramePseudoInstr(MachineFunction &MF, -                                     MachineBasicBlock &MBB, -                                     MachineBasicBlock::iterator I) const; +                                  MachineBasicBlock &MBB, +                                  MachineBasicBlock::iterator I) const override;  };  } // End llvm namespace diff --git a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp index 45f0734..023dd5e 100644 --- a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp +++ b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp @@ -40,10 +40,9 @@ public:    GenericToNVVM() : ModulePass(ID) {} -  virtual bool runOnModule(Module &M); +  bool runOnModule(Module &M) override; -  virtual void getAnalysisUsage(AnalysisUsage &AU) const { -  } +  void getAnalysisUsage(AnalysisUsage &AU) const override {}  private:    Value *getOrInsertCVTA(Module *M, Function *F, GlobalVariable *GV, @@ -88,7 +87,8 @@ bool GenericToNVVM::runOnModule(Module &M) {          !GV->getName().startswith("llvm.")) {        GlobalVariable *NewGV = new GlobalVariable(            M, GV->getType()->getElementType(), GV->isConstant(), -          GV->getLinkage(), GV->hasInitializer() ? GV->getInitializer() : NULL, +          GV->getLinkage(), +          GV->hasInitializer() ? GV->getInitializer() : nullptr,            "", GV, GV->getThreadLocalMode(), llvm::ADDRESS_SPACE_GLOBAL);        NewGV->copyAttributesFrom(GV);        GVMap[GV] = NewGV; @@ -162,7 +162,7 @@ Value *GenericToNVVM::getOrInsertCVTA(Module *M, Function *F,                                        GlobalVariable *GV,                                        IRBuilder<> &Builder) {    PointerType *GVType = GV->getType(); -  Value *CVTA = NULL; +  Value *CVTA = nullptr;    // See if the address space conversion requires the operand to be bitcast    // to i8 addrspace(n)* first. diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index bd08d2d..cd30880 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -20,11 +20,10 @@  #include "llvm/Support/raw_ostream.h"  #include "llvm/Target/TargetIntrinsicInfo.h" -#undef DEBUG_TYPE -#define DEBUG_TYPE "nvptx-isel" -  using namespace llvm; +#define DEBUG_TYPE "nvptx-isel" +  static cl::opt<int>  FMAContractLevel("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden,                   cl::desc("NVPTX Specific: FMA contraction (0: don't do it" @@ -120,10 +119,10 @@ SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {    if (N->isMachineOpcode()) {      N->setNodeId(-1); -    return NULL; // Already selected. +    return nullptr; // Already selected.    } -  SDNode *ResNode = NULL; +  SDNode *ResNode = nullptr;    switch (N->getOpcode()) {    case ISD::LOAD:      ResNode = SelectLoad(N); @@ -162,6 +161,98 @@ SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {    case NVPTXISD::StoreParamU32:      ResNode = SelectStoreParam(N);      break; +  case ISD::INTRINSIC_WO_CHAIN: +    ResNode = SelectIntrinsicNoChain(N); +    break; +  case NVPTXISD::Tex1DFloatI32: +  case NVPTXISD::Tex1DFloatFloat: +  case NVPTXISD::Tex1DFloatFloatLevel: +  case NVPTXISD::Tex1DFloatFloatGrad: +  case NVPTXISD::Tex1DI32I32: +  case NVPTXISD::Tex1DI32Float: +  case NVPTXISD::Tex1DI32FloatLevel: +  case NVPTXISD::Tex1DI32FloatGrad: +  case NVPTXISD::Tex1DArrayFloatI32: +  case NVPTXISD::Tex1DArrayFloatFloat: +  case NVPTXISD::Tex1DArrayFloatFloatLevel: +  case NVPTXISD::Tex1DArrayFloatFloatGrad: +  case NVPTXISD::Tex1DArrayI32I32: +  case NVPTXISD::Tex1DArrayI32Float: +  case NVPTXISD::Tex1DArrayI32FloatLevel: +  case NVPTXISD::Tex1DArrayI32FloatGrad: +  case NVPTXISD::Tex2DFloatI32: +  case NVPTXISD::Tex2DFloatFloat: +  case NVPTXISD::Tex2DFloatFloatLevel: +  case NVPTXISD::Tex2DFloatFloatGrad: +  case NVPTXISD::Tex2DI32I32: +  case NVPTXISD::Tex2DI32Float: +  case NVPTXISD::Tex2DI32FloatLevel: +  case NVPTXISD::Tex2DI32FloatGrad: +  case NVPTXISD::Tex2DArrayFloatI32: +  case NVPTXISD::Tex2DArrayFloatFloat: +  case NVPTXISD::Tex2DArrayFloatFloatLevel: +  case NVPTXISD::Tex2DArrayFloatFloatGrad: +  case NVPTXISD::Tex2DArrayI32I32: +  case NVPTXISD::Tex2DArrayI32Float: +  case NVPTXISD::Tex2DArrayI32FloatLevel: +  case NVPTXISD::Tex2DArrayI32FloatGrad: +  case NVPTXISD::Tex3DFloatI32: +  case NVPTXISD::Tex3DFloatFloat: +  case NVPTXISD::Tex3DFloatFloatLevel: +  case NVPTXISD::Tex3DFloatFloatGrad: +  case NVPTXISD::Tex3DI32I32: +  case NVPTXISD::Tex3DI32Float: +  case NVPTXISD::Tex3DI32FloatLevel: +  case NVPTXISD::Tex3DI32FloatGrad: +    ResNode = SelectTextureIntrinsic(N); +    break; +  case NVPTXISD::Suld1DI8Trap: +  case NVPTXISD::Suld1DI16Trap: +  case NVPTXISD::Suld1DI32Trap: +  case NVPTXISD::Suld1DV2I8Trap: +  case NVPTXISD::Suld1DV2I16Trap: +  case NVPTXISD::Suld1DV2I32Trap: +  case NVPTXISD::Suld1DV4I8Trap: +  case NVPTXISD::Suld1DV4I16Trap: +  case NVPTXISD::Suld1DV4I32Trap: +  case NVPTXISD::Suld1DArrayI8Trap: +  case NVPTXISD::Suld1DArrayI16Trap: +  case NVPTXISD::Suld1DArrayI32Trap: +  case NVPTXISD::Suld1DArrayV2I8Trap: +  case NVPTXISD::Suld1DArrayV2I16Trap: +  case NVPTXISD::Suld1DArrayV2I32Trap: +  case NVPTXISD::Suld1DArrayV4I8Trap: +  case NVPTXISD::Suld1DArrayV4I16Trap: +  case NVPTXISD::Suld1DArrayV4I32Trap: +  case NVPTXISD::Suld2DI8Trap: +  case NVPTXISD::Suld2DI16Trap: +  case NVPTXISD::Suld2DI32Trap: +  case NVPTXISD::Suld2DV2I8Trap: +  case NVPTXISD::Suld2DV2I16Trap: +  case NVPTXISD::Suld2DV2I32Trap: +  case NVPTXISD::Suld2DV4I8Trap: +  case NVPTXISD::Suld2DV4I16Trap: +  case NVPTXISD::Suld2DV4I32Trap: +  case NVPTXISD::Suld2DArrayI8Trap: +  case NVPTXISD::Suld2DArrayI16Trap: +  case NVPTXISD::Suld2DArrayI32Trap: +  case NVPTXISD::Suld2DArrayV2I8Trap: +  case NVPTXISD::Suld2DArrayV2I16Trap: +  case NVPTXISD::Suld2DArrayV2I32Trap: +  case NVPTXISD::Suld2DArrayV4I8Trap: +  case NVPTXISD::Suld2DArrayV4I16Trap: +  case NVPTXISD::Suld2DArrayV4I32Trap: +  case NVPTXISD::Suld3DI8Trap: +  case NVPTXISD::Suld3DI16Trap: +  case NVPTXISD::Suld3DI32Trap: +  case NVPTXISD::Suld3DV2I8Trap: +  case NVPTXISD::Suld3DV2I16Trap: +  case NVPTXISD::Suld3DV2I32Trap: +  case NVPTXISD::Suld3DV4I8Trap: +  case NVPTXISD::Suld3DV4I16Trap: +  case NVPTXISD::Suld3DV4I32Trap: +    ResNode = SelectSurfaceIntrinsic(N); +    break;    case ISD::ADDRSPACECAST:      ResNode = SelectAddrSpaceCast(N);      break; @@ -175,7 +266,7 @@ SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {  static unsigned int getCodeAddrSpace(MemSDNode *N,                                       const NVPTXSubtarget &Subtarget) { -  const Value *Src = N->getSrcValue(); +  const Value *Src = N->getMemOperand()->getValue();    if (!Src)      return NVPTX::PTXLdStInstCode::GENERIC; @@ -194,6 +285,24 @@ static unsigned int getCodeAddrSpace(MemSDNode *N,    return NVPTX::PTXLdStInstCode::GENERIC;  } +SDNode *NVPTXDAGToDAGISel::SelectIntrinsicNoChain(SDNode *N) { +  unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); +  switch (IID) { +  default: +    return nullptr; +  case Intrinsic::nvvm_texsurf_handle_internal: +    return SelectTexSurfHandle(N); +  } +} + +SDNode *NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) { +  // Op 0 is the intrinsic ID +  SDValue Wrapper = N->getOperand(1); +  SDValue GlobalVal = Wrapper.getOperand(0); +  return CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N), MVT::i64, +                                GlobalVal); +} +  SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {    SDValue Src = N->getOperand(0);    AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N); @@ -258,14 +367,14 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {    SDLoc dl(N);    LoadSDNode *LD = cast<LoadSDNode>(N);    EVT LoadedVT = LD->getMemoryVT(); -  SDNode *NVPTXLD = NULL; +  SDNode *NVPTXLD = nullptr;    // do not support pre/post inc/dec    if (LD->isIndexed()) -    return NULL; +    return nullptr;    if (!LoadedVT.isSimple()) -    return NULL; +    return nullptr;    // Address Space Setting    unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget); @@ -288,7 +397,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {      else if (num == 4)        vecType = NVPTX::PTXLdStInstCode::V4;      else -      return NULL; +      return nullptr;    }    // Type Setting: fromType + fromTypeWidth @@ -337,7 +446,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {        Opcode = NVPTX::LD_f64_avar;        break;      default: -      return NULL; +      return nullptr;      }      SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),                        getI32Imm(vecType), getI32Imm(fromType), @@ -366,7 +475,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {        Opcode = NVPTX::LD_f64_asi;        break;      default: -      return NULL; +      return nullptr;      }      SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),                        getI32Imm(vecType), getI32Imm(fromType), @@ -396,7 +505,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {          Opcode = NVPTX::LD_f64_ari_64;          break;        default: -        return NULL; +        return nullptr;        }      } else {        switch (TargetVT) { @@ -419,7 +528,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {          Opcode = NVPTX::LD_f64_ari;          break;        default: -        return NULL; +        return nullptr;        }      }      SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), @@ -448,7 +557,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {          Opcode = NVPTX::LD_f64_areg_64;          break;        default: -        return NULL; +        return nullptr;        }      } else {        switch (TargetVT) { @@ -471,7 +580,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {          Opcode = NVPTX::LD_f64_areg;          break;        default: -        return NULL; +        return nullptr;        }      }      SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), @@ -480,7 +589,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {      NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);    } -  if (NVPTXLD != NULL) { +  if (NVPTXLD) {      MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);      MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();      cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1); @@ -501,7 +610,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {    EVT LoadedVT = MemSD->getMemoryVT();    if (!LoadedVT.isSimple()) -    return NULL; +    return nullptr;    // Address Space Setting    unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget); @@ -547,7 +656,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {      VecType = NVPTX::PTXLdStInstCode::V4;      break;    default: -    return NULL; +    return nullptr;    }    EVT EltVT = N->getValueType(0); @@ -555,11 +664,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {    if (SelectDirectAddr(Op1, Addr)) {      switch (N->getOpcode()) {      default: -      return NULL; +      return nullptr;      case NVPTXISD::LoadV2:        switch (EltVT.getSimpleVT().SimpleTy) {        default: -        return NULL; +        return nullptr;        case MVT::i8:          Opcode = NVPTX::LDV_i8_v2_avar;          break; @@ -583,7 +692,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {      case NVPTXISD::LoadV4:        switch (EltVT.getSimpleVT().SimpleTy) {        default: -        return NULL; +        return nullptr;        case MVT::i8:          Opcode = NVPTX::LDV_i8_v4_avar;          break; @@ -609,11 +718,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {                   : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {      switch (N->getOpcode()) {      default: -      return NULL; +      return nullptr;      case NVPTXISD::LoadV2:        switch (EltVT.getSimpleVT().SimpleTy) {        default: -        return NULL; +        return nullptr;        case MVT::i8:          Opcode = NVPTX::LDV_i8_v2_asi;          break; @@ -637,7 +746,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {      case NVPTXISD::LoadV4:        switch (EltVT.getSimpleVT().SimpleTy) {        default: -        return NULL; +        return nullptr;        case MVT::i8:          Opcode = NVPTX::LDV_i8_v4_asi;          break; @@ -664,11 +773,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {      if (Subtarget.is64Bit()) {        switch (N->getOpcode()) {        default: -        return NULL; +        return nullptr;        case NVPTXISD::LoadV2:          switch (EltVT.getSimpleVT().SimpleTy) {          default: -          return NULL; +          return nullptr;          case MVT::i8:            Opcode = NVPTX::LDV_i8_v2_ari_64;            break; @@ -692,7 +801,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {        case NVPTXISD::LoadV4:          switch (EltVT.getSimpleVT().SimpleTy) {          default: -          return NULL; +          return nullptr;          case MVT::i8:            Opcode = NVPTX::LDV_i8_v4_ari_64;            break; @@ -711,11 +820,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {      } else {        switch (N->getOpcode()) {        default: -        return NULL; +        return nullptr;        case NVPTXISD::LoadV2:          switch (EltVT.getSimpleVT().SimpleTy) {          default: -          return NULL; +          return nullptr;          case MVT::i8:            Opcode = NVPTX::LDV_i8_v2_ari;            break; @@ -739,7 +848,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {        case NVPTXISD::LoadV4:          switch (EltVT.getSimpleVT().SimpleTy) {          default: -          return NULL; +          return nullptr;          case MVT::i8:            Opcode = NVPTX::LDV_i8_v4_ari;            break; @@ -766,11 +875,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {      if (Subtarget.is64Bit()) {        switch (N->getOpcode()) {        default: -        return NULL; +        return nullptr;        case NVPTXISD::LoadV2:          switch (EltVT.getSimpleVT().SimpleTy) {          default: -          return NULL; +          return nullptr;          case MVT::i8:            Opcode = NVPTX::LDV_i8_v2_areg_64;            break; @@ -794,7 +903,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {        case NVPTXISD::LoadV4:          switch (EltVT.getSimpleVT().SimpleTy) {          default: -          return NULL; +          return nullptr;          case MVT::i8:            Opcode = NVPTX::LDV_i8_v4_areg_64;            break; @@ -813,11 +922,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {      } else {        switch (N->getOpcode()) {        default: -        return NULL; +        return nullptr;        case NVPTXISD::LoadV2:          switch (EltVT.getSimpleVT().SimpleTy) {          default: -          return NULL; +          return nullptr;          case MVT::i8:            Opcode = NVPTX::LDV_i8_v2_areg;            break; @@ -841,7 +950,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {        case NVPTXISD::LoadV4:          switch (EltVT.getSimpleVT().SimpleTy) {          default: -          return NULL; +          return nullptr;          case MVT::i8:            Opcode = NVPTX::LDV_i8_v4_areg;            break; @@ -887,11 +996,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {    if (SelectDirectAddr(Op1, Addr)) {      switch (N->getOpcode()) {      default: -      return NULL; +      return nullptr;      case NVPTXISD::LDGV2:        switch (EltVT.getSimpleVT().SimpleTy) {        default: -        return NULL; +        return nullptr;        case MVT::i8:          Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;          break; @@ -915,7 +1024,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {      case NVPTXISD::LDUV2:        switch (EltVT.getSimpleVT().SimpleTy) {        default: -        return NULL; +        return nullptr;        case MVT::i8:          Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;          break; @@ -939,7 +1048,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {      case NVPTXISD::LDGV4:        switch (EltVT.getSimpleVT().SimpleTy) {        default: -        return NULL; +        return nullptr;        case MVT::i8:          Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;          break; @@ -957,7 +1066,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {      case NVPTXISD::LDUV4:        switch (EltVT.getSimpleVT().SimpleTy) {        default: -        return NULL; +        return nullptr;        case MVT::i8:          Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;          break; @@ -975,19 +1084,18 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {      }      SDValue Ops[] = { Addr, Chain }; -    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), -                                ArrayRef<SDValue>(Ops, 2)); +    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);    } else if (Subtarget.is64Bit()                   ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)                   : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {      if (Subtarget.is64Bit()) {        switch (N->getOpcode()) {        default: -        return NULL; +        return nullptr;        case NVPTXISD::LDGV2:          switch (EltVT.getSimpleVT().SimpleTy) {          default: -          return NULL; +          return nullptr;          case MVT::i8:            Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;            break; @@ -1011,7 +1119,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {        case NVPTXISD::LDUV2:          switch (EltVT.getSimpleVT().SimpleTy) {          default: -          return NULL; +          return nullptr;          case MVT::i8:            Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;            break; @@ -1035,7 +1143,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {        case NVPTXISD::LDGV4:          switch (EltVT.getSimpleVT().SimpleTy) {          default: -          return NULL; +          return nullptr;          case MVT::i8:            Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;            break; @@ -1053,7 +1161,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {        case NVPTXISD::LDUV4:          switch (EltVT.getSimpleVT().SimpleTy) {          default: -          return NULL; +          return nullptr;          case MVT::i8:            Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;            break; @@ -1072,11 +1180,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {      } else {        switch (N->getOpcode()) {        default: -        return NULL; +        return nullptr;        case NVPTXISD::LDGV2:          switch (EltVT.getSimpleVT().SimpleTy) {          default: -          return NULL; +          return nullptr;          case MVT::i8:            Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;            break; @@ -1100,7 +1208,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {        case NVPTXISD::LDUV2:          switch (EltVT.getSimpleVT().SimpleTy) {          default: -          return NULL; +          return nullptr;          case MVT::i8:            Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;            break; @@ -1124,7 +1232,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {        case NVPTXISD::LDGV4:          switch (EltVT.getSimpleVT().SimpleTy) {          default: -          return NULL; +          return nullptr;          case MVT::i8:            Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;            break; @@ -1142,7 +1250,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {        case NVPTXISD::LDUV4:          switch (EltVT.getSimpleVT().SimpleTy) {          default: -          return NULL; +          return nullptr;          case MVT::i8:            Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;            break; @@ -1162,17 +1270,16 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {      SDValue Ops[] = { Base, Offset, Chain }; -    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), -                                ArrayRef<SDValue>(Ops, 3)); +    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);    } else {      if (Subtarget.is64Bit()) {        switch (N->getOpcode()) {        default: -        return NULL; +        return nullptr;        case NVPTXISD::LDGV2:          switch (EltVT.getSimpleVT().SimpleTy) {          default: -          return NULL; +          return nullptr;          case MVT::i8:            Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;            break; @@ -1196,7 +1303,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {        case NVPTXISD::LDUV2:          switch (EltVT.getSimpleVT().SimpleTy) {          default: -          return NULL; +          return nullptr;          case MVT::i8:            Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;            break; @@ -1220,7 +1327,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {        case NVPTXISD::LDGV4:          switch (EltVT.getSimpleVT().SimpleTy) {          default: -          return NULL; +          return nullptr;          case MVT::i8:            Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;            break; @@ -1238,7 +1345,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {        case NVPTXISD::LDUV4:          switch (EltVT.getSimpleVT().SimpleTy) {          default: -          return NULL; +          return nullptr;          case MVT::i8:            Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;            break; @@ -1257,11 +1364,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {      } else {        switch (N->getOpcode()) {        default: -        return NULL; +        return nullptr;        case NVPTXISD::LDGV2:          switch (EltVT.getSimpleVT().SimpleTy) {          default: -          return NULL; +          return nullptr;          case MVT::i8:            Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;            break; @@ -1285,7 +1392,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {        case NVPTXISD::LDUV2:          switch (EltVT.getSimpleVT().SimpleTy) {          default: -          return NULL; +          return nullptr;          case MVT::i8:            Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;            break; @@ -1309,7 +1416,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {        case NVPTXISD::LDGV4:          switch (EltVT.getSimpleVT().SimpleTy) {          default: -          return NULL; +          return nullptr;          case MVT::i8:            Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;            break; @@ -1327,7 +1434,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {        case NVPTXISD::LDUV4:          switch (EltVT.getSimpleVT().SimpleTy) {          default: -          return NULL; +          return nullptr;          case MVT::i8:            Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;            break; @@ -1346,8 +1453,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {      }      SDValue Ops[] = { Op1, Chain }; -    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), -                                ArrayRef<SDValue>(Ops, 2)); +    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);    }    MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); @@ -1361,14 +1467,14 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {    SDLoc dl(N);    StoreSDNode *ST = cast<StoreSDNode>(N);    EVT StoreVT = ST->getMemoryVT(); -  SDNode *NVPTXST = NULL; +  SDNode *NVPTXST = nullptr;    // do not support pre/post inc/dec    if (ST->isIndexed()) -    return NULL; +    return nullptr;    if (!StoreVT.isSimple()) -    return NULL; +    return nullptr;    // Address Space Setting    unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget); @@ -1391,7 +1497,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {      else if (num == 4)        vecType = NVPTX::PTXLdStInstCode::V4;      else -      return NULL; +      return nullptr;    }    // Type Setting: toType + toTypeWidth @@ -1435,7 +1541,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {        Opcode = NVPTX::ST_f64_avar;        break;      default: -      return NULL; +      return nullptr;      }      SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),                        getI32Imm(vecType), getI32Imm(toType), @@ -1464,7 +1570,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {        Opcode = NVPTX::ST_f64_asi;        break;      default: -      return NULL; +      return nullptr;      }      SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),                        getI32Imm(vecType), getI32Imm(toType), @@ -1494,7 +1600,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {          Opcode = NVPTX::ST_f64_ari_64;          break;        default: -        return NULL; +        return nullptr;        }      } else {        switch (SourceVT) { @@ -1517,7 +1623,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {          Opcode = NVPTX::ST_f64_ari;          break;        default: -        return NULL; +        return nullptr;        }      }      SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), @@ -1546,7 +1652,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {          Opcode = NVPTX::ST_f64_areg_64;          break;        default: -        return NULL; +        return nullptr;        }      } else {        switch (SourceVT) { @@ -1569,7 +1675,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {          Opcode = NVPTX::ST_f64_areg;          break;        default: -        return NULL; +        return nullptr;        }      }      SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), @@ -1578,7 +1684,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {      NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);    } -  if (NVPTXST != NULL) { +  if (NVPTXST) {      MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);      MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();      cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1); @@ -1645,7 +1751,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {      N2 = N->getOperand(5);      break;    default: -    return NULL; +    return nullptr;    }    StOps.push_back(getI32Imm(IsVolatile)); @@ -1657,11 +1763,11 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {    if (SelectDirectAddr(N2, Addr)) {      switch (N->getOpcode()) {      default: -      return NULL; +      return nullptr;      case NVPTXISD::StoreV2:        switch (EltVT.getSimpleVT().SimpleTy) {        default: -        return NULL; +        return nullptr;        case MVT::i8:          Opcode = NVPTX::STV_i8_v2_avar;          break; @@ -1685,7 +1791,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {      case NVPTXISD::StoreV4:        switch (EltVT.getSimpleVT().SimpleTy) {        default: -        return NULL; +        return nullptr;        case MVT::i8:          Opcode = NVPTX::STV_i8_v4_avar;          break; @@ -1707,11 +1813,11 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {                   : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {      switch (N->getOpcode()) {      default: -      return NULL; +      return nullptr;      case NVPTXISD::StoreV2:        switch (EltVT.getSimpleVT().SimpleTy) {        default: -        return NULL; +        return nullptr;        case MVT::i8:          Opcode = NVPTX::STV_i8_v2_asi;          break; @@ -1735,7 +1841,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {      case NVPTXISD::StoreV4:        switch (EltVT.getSimpleVT().SimpleTy) {        default: -        return NULL; +        return nullptr;        case MVT::i8:          Opcode = NVPTX::STV_i8_v4_asi;          break; @@ -1759,11 +1865,11 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {      if (Subtarget.is64Bit()) {        switch (N->getOpcode()) {        default: -        return NULL; +        return nullptr;        case NVPTXISD::StoreV2:          switch (EltVT.getSimpleVT().SimpleTy) {          default: -          return NULL; +          return nullptr;          case MVT::i8:            Opcode = NVPTX::STV_i8_v2_ari_64;            break; @@ -1787,7 +1893,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {        case NVPTXISD::StoreV4:          switch (EltVT.getSimpleVT().SimpleTy) {          default: -          return NULL; +          return nullptr;          case MVT::i8:            Opcode = NVPTX::STV_i8_v4_ari_64;            break; @@ -1806,11 +1912,11 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {      } else {        switch (N->getOpcode()) {        default: -        return NULL; +        return nullptr;        case NVPTXISD::StoreV2:          switch (EltVT.getSimpleVT().SimpleTy) {          default: -          return NULL; +          return nullptr;          case MVT::i8:            Opcode = NVPTX::STV_i8_v2_ari;            break; @@ -1834,7 +1940,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {        case NVPTXISD::StoreV4:          switch (EltVT.getSimpleVT().SimpleTy) {          default: -          return NULL; +          return nullptr;          case MVT::i8:            Opcode = NVPTX::STV_i8_v4_ari;            break; @@ -1857,11 +1963,11 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {      if (Subtarget.is64Bit()) {        switch (N->getOpcode()) {        default: -        return NULL; +        return nullptr;        case NVPTXISD::StoreV2:          switch (EltVT.getSimpleVT().SimpleTy) {          default: -          return NULL; +          return nullptr;          case MVT::i8:            Opcode = NVPTX::STV_i8_v2_areg_64;            break; @@ -1885,7 +1991,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {        case NVPTXISD::StoreV4:          switch (EltVT.getSimpleVT().SimpleTy) {          default: -          return NULL; +          return nullptr;          case MVT::i8:            Opcode = NVPTX::STV_i8_v4_areg_64;            break; @@ -1904,11 +2010,11 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {      } else {        switch (N->getOpcode()) {        default: -        return NULL; +        return nullptr;        case NVPTXISD::StoreV2:          switch (EltVT.getSimpleVT().SimpleTy) {          default: -          return NULL; +          return nullptr;          case MVT::i8:            Opcode = NVPTX::STV_i8_v2_areg;            break; @@ -1932,7 +2038,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {        case NVPTXISD::StoreV4:          switch (EltVT.getSimpleVT().SimpleTy) {          default: -          return NULL; +          return nullptr;          case MVT::i8:            Opcode = NVPTX::STV_i8_v4_areg;            break; @@ -1973,7 +2079,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {    unsigned VecSize;    switch (Node->getOpcode()) {    default: -    return NULL; +    return nullptr;    case NVPTXISD::LoadParam:      VecSize = 1;      break; @@ -1992,11 +2098,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {    switch (VecSize) {    default: -    return NULL; +    return nullptr;    case 1:      switch (MemVT.getSimpleVT().SimpleTy) {      default: -      return NULL; +      return nullptr;      case MVT::i1:        Opc = NVPTX::LoadParamMemI8;        break; @@ -2023,7 +2129,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {    case 2:      switch (MemVT.getSimpleVT().SimpleTy) {      default: -      return NULL; +      return nullptr;      case MVT::i1:        Opc = NVPTX::LoadParamMemV2I8;        break; @@ -2050,7 +2156,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {    case 4:      switch (MemVT.getSimpleVT().SimpleTy) {      default: -      return NULL; +      return nullptr;      case MVT::i1:        Opc = NVPTX::LoadParamMemV4I8;        break; @@ -2077,7 +2183,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {      VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);    } else {      EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue }; -    VTs = CurDAG->getVTList(&EVTs[0], array_lengthof(EVTs)); +    VTs = CurDAG->getVTList(EVTs);    }    unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue(); @@ -2103,7 +2209,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {    unsigned NumElts = 1;    switch (N->getOpcode()) {    default: -    return NULL; +    return nullptr;    case NVPTXISD::StoreRetval:      NumElts = 1;      break; @@ -2128,11 +2234,11 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {    unsigned Opcode = 0;    switch (NumElts) {    default: -    return NULL; +    return nullptr;    case 1:      switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {      default: -      return NULL; +      return nullptr;      case MVT::i1:        Opcode = NVPTX::StoreRetvalI8;        break; @@ -2159,7 +2265,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {    case 2:      switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {      default: -      return NULL; +      return nullptr;      case MVT::i1:        Opcode = NVPTX::StoreRetvalV2I8;        break; @@ -2186,7 +2292,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {    case 4:      switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {      default: -      return NULL; +      return nullptr;      case MVT::i1:        Opcode = NVPTX::StoreRetvalV4I8;        break; @@ -2229,7 +2335,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {    unsigned NumElts = 1;    switch (N->getOpcode()) {    default: -    return NULL; +    return nullptr;    case NVPTXISD::StoreParamU32:    case NVPTXISD::StoreParamS32:    case NVPTXISD::StoreParam: @@ -2260,11 +2366,11 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {    default:      switch (NumElts) {      default: -      return NULL; +      return nullptr;      case 1:        switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {        default: -        return NULL; +        return nullptr;        case MVT::i1:          Opcode = NVPTX::StoreParamI8;          break; @@ -2291,7 +2397,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {      case 2:        switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {        default: -        return NULL; +        return nullptr;        case MVT::i1:          Opcode = NVPTX::StoreParamV2I8;          break; @@ -2318,7 +2424,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {      case 4:        switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {        default: -        return NULL; +        return nullptr;        case MVT::i1:          Opcode = NVPTX::StoreParamV4I8;          break; @@ -2371,6 +2477,488 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {    return Ret;  } +SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) { +  SDValue Chain = N->getOperand(0); +  SDValue TexRef = N->getOperand(1); +  SDValue SampRef = N->getOperand(2); +  SDNode *Ret = nullptr; +  unsigned Opc = 0; +  SmallVector<SDValue, 8> Ops; + +  switch (N->getOpcode()) { +  default: return nullptr; +  case NVPTXISD::Tex1DFloatI32: +    Opc = NVPTX::TEX_1D_F32_I32; +    break; +  case NVPTXISD::Tex1DFloatFloat: +    Opc = NVPTX::TEX_1D_F32_F32; +    break; +  case NVPTXISD::Tex1DFloatFloatLevel: +    Opc = NVPTX::TEX_1D_F32_F32_LEVEL; +    break; +  case NVPTXISD::Tex1DFloatFloatGrad: +    Opc = NVPTX::TEX_1D_F32_F32_GRAD; +    break; +  case NVPTXISD::Tex1DI32I32: +    Opc = NVPTX::TEX_1D_I32_I32; +    break; +  case NVPTXISD::Tex1DI32Float: +    Opc = NVPTX::TEX_1D_I32_F32; +    break; +  case NVPTXISD::Tex1DI32FloatLevel: +    Opc = NVPTX::TEX_1D_I32_F32_LEVEL; +    break; +  case NVPTXISD::Tex1DI32FloatGrad: +    Opc = NVPTX::TEX_1D_I32_F32_GRAD; +    break; +  case NVPTXISD::Tex1DArrayFloatI32: +    Opc = NVPTX::TEX_1D_ARRAY_F32_I32; +    break; +  case NVPTXISD::Tex1DArrayFloatFloat: +    Opc = NVPTX::TEX_1D_ARRAY_F32_F32; +    break; +  case NVPTXISD::Tex1DArrayFloatFloatLevel: +    Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL; +    break; +  case NVPTXISD::Tex1DArrayFloatFloatGrad: +    Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD; +    break; +  case NVPTXISD::Tex1DArrayI32I32: +    Opc = NVPTX::TEX_1D_ARRAY_I32_I32; +    break; +  case NVPTXISD::Tex1DArrayI32Float: +    Opc = NVPTX::TEX_1D_ARRAY_I32_F32; +    break; +  case NVPTXISD::Tex1DArrayI32FloatLevel: +    Opc = NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL; +    break; +  case NVPTXISD::Tex1DArrayI32FloatGrad: +    Opc = NVPTX::TEX_1D_ARRAY_I32_F32_GRAD; +    break; +  case NVPTXISD::Tex2DFloatI32: +    Opc = NVPTX::TEX_2D_F32_I32; +    break; +  case NVPTXISD::Tex2DFloatFloat: +    Opc = NVPTX::TEX_2D_F32_F32; +    break; +  case NVPTXISD::Tex2DFloatFloatLevel: +    Opc = NVPTX::TEX_2D_F32_F32_LEVEL; +    break; +  case NVPTXISD::Tex2DFloatFloatGrad: +    Opc = NVPTX::TEX_2D_F32_F32_GRAD; +    break; +  case NVPTXISD::Tex2DI32I32: +    Opc = NVPTX::TEX_2D_I32_I32; +    break; +  case NVPTXISD::Tex2DI32Float: +    Opc = NVPTX::TEX_2D_I32_F32; +    break; +  case NVPTXISD::Tex2DI32FloatLevel: +    Opc = NVPTX::TEX_2D_I32_F32_LEVEL; +    break; +  case NVPTXISD::Tex2DI32FloatGrad: +    Opc = NVPTX::TEX_2D_I32_F32_GRAD; +    break; +  case NVPTXISD::Tex2DArrayFloatI32: +    Opc = NVPTX::TEX_2D_ARRAY_F32_I32; +    break; +  case NVPTXISD::Tex2DArrayFloatFloat: +    Opc = NVPTX::TEX_2D_ARRAY_F32_F32; +    break; +  case NVPTXISD::Tex2DArrayFloatFloatLevel: +    Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL; +    break; +  case NVPTXISD::Tex2DArrayFloatFloatGrad: +    Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD; +    break; +  case NVPTXISD::Tex2DArrayI32I32: +    Opc = NVPTX::TEX_2D_ARRAY_I32_I32; +    break; +  case NVPTXISD::Tex2DArrayI32Float: +    Opc = NVPTX::TEX_2D_ARRAY_I32_F32; +    break; +  case NVPTXISD::Tex2DArrayI32FloatLevel: +    Opc = NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL; +    break; +  case NVPTXISD::Tex2DArrayI32FloatGrad: +    Opc = NVPTX::TEX_2D_ARRAY_I32_F32_GRAD; +    break; +  case NVPTXISD::Tex3DFloatI32: +    Opc = NVPTX::TEX_3D_F32_I32; +    break; +  case NVPTXISD::Tex3DFloatFloat: +    Opc = NVPTX::TEX_3D_F32_F32; +    break; +  case NVPTXISD::Tex3DFloatFloatLevel: +    Opc = NVPTX::TEX_3D_F32_F32_LEVEL; +    break; +  case NVPTXISD::Tex3DFloatFloatGrad: +    Opc = NVPTX::TEX_3D_F32_F32_GRAD; +    break; +  case NVPTXISD::Tex3DI32I32: +    Opc = NVPTX::TEX_3D_I32_I32; +    break; +  case NVPTXISD::Tex3DI32Float: +    Opc = NVPTX::TEX_3D_I32_F32; +    break; +  case NVPTXISD::Tex3DI32FloatLevel: +    Opc = NVPTX::TEX_3D_I32_F32_LEVEL; +    break; +  case NVPTXISD::Tex3DI32FloatGrad: +    Opc = NVPTX::TEX_3D_I32_F32_GRAD; +    break; +  } + +  Ops.push_back(TexRef); +  Ops.push_back(SampRef); + +  // Copy over indices +  for (unsigned i = 3; i < N->getNumOperands(); ++i) { +    Ops.push_back(N->getOperand(i)); +  } + +  Ops.push_back(Chain); +  Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); +  return Ret; +} + +SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) { +  SDValue Chain = N->getOperand(0); +  SDValue TexHandle = N->getOperand(1); +  SDNode *Ret = nullptr; +  unsigned Opc = 0; +  SmallVector<SDValue, 8> Ops; +  switch (N->getOpcode()) { +  default: return nullptr; +  case NVPTXISD::Suld1DI8Trap: +    Opc = NVPTX::SULD_1D_I8_TRAP; +    Ops.push_back(TexHandle); +    Ops.push_back(N->getOperand(2)); +    Ops.push_back(Chain); +    break; +  case NVPTXISD::Suld1DI16Trap: +    Opc = NVPTX::SULD_1D_I16_TRAP; +    Ops.push_back(TexHandle); +    Ops.push_back(N->getOperand(2)); +    Ops.push_back(Chain); +    break; +  case NVPTXISD::Suld1DI32Trap: +    Opc = NVPTX::SULD_1D_I32_TRAP; +    Ops.push_back(TexHandle); +    Ops.push_back(N->getOperand(2)); +    Ops.push_back(Chain); +    break; +  case NVPTXISD::Suld1DV2I8Trap: +    Opc = NVPTX::SULD_1D_V2I8_TRAP; +    Ops.push_back(TexHandle); +    Ops.push_back(N->getOperand(2)); +    Ops.push_back(Chain); +    break; +  case NVPTXISD::Suld1DV2I16Trap: +    Opc = NVPTX::SULD_1D_V2I16_TRAP; +    Ops.push_back(TexHandle); +    Ops.push_back(N->getOperand(2)); +    Ops.push_back(Chain); +    break; +  case NVPTXISD::Suld1DV2I32Trap: +    Opc = NVPTX::SULD_1D_V2I32_TRAP; +    Ops.push_back(TexHandle); +    Ops.push_back(N->getOperand(2)); +    Ops.push_back(Chain); +    break; +  case NVPTXISD::Suld1DV4I8Trap: +    Opc = NVPTX::SULD_1D_V4I8_TRAP; +    Ops.push_back(TexHandle); +    Ops.push_back(N->getOperand(2)); +    Ops.push_back(Chain); +    break; +  case NVPTXISD::Suld1DV4I16Trap: +    Opc = NVPTX::SULD_1D_V4I16_TRAP; +    Ops.push_back(TexHandle); +    Ops.push_back(N->getOperand(2)); +    Ops.push_back(Chain); +    break; +  case NVPTXISD::Suld1DV4I32Trap: +    Opc = NVPTX::SULD_1D_V4I32_TRAP; +    Ops.push_back(TexHandle); +    Ops.push_back(N->getOperand(2)); +    Ops.push_back(Chain); +    break; +  case NVPTXISD::Suld1DArrayI8Trap: +    Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP; +    Ops.push_back(TexHandle); +    Ops.push_back(N->getOperand(2)); +    Ops.push_back(N->getOperand(3)); +    Ops.push_back(Chain); +    break; +  case NVPTXISD::Suld1DArrayI16Trap: +    Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP; +    Ops.push_back(TexHandle); +    Ops.push_back(N->getOperand(2)); +    Ops.push_back(N->getOperand(3)); +    Ops.push_back(Chain); +    break; +  case NVPTXISD::Suld1DArrayI32Trap: +    Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP; +    Ops.push_back(TexHandle); +    Ops.push_back(N->getOperand(2)); +    Ops.push_back(N->getOperand(3)); +    Ops.push_back(Chain); +    break; +  case NVPTXISD::Suld1DArrayV2I8Trap: +    Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP; +    Ops.push_back(TexHandle); +    Ops.push_back(N->getOperand(2)); +    Ops.push_back(N->getOperand(3)); +    Ops.push_back(Chain); +    break; +  case NVPTXISD::Suld1DArrayV2I16Trap: +    Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP; +    Ops.push_back(TexHandle); +    Ops.push_back(N->getOperand(2)); +    Ops.push_back(N->getOperand(3)); +    Ops.push_back(Chain); +    break; +  case NVPTXISD::Suld1DArrayV2I32Trap: +    Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP; +    Ops.push_back(TexHandle); +    Ops.push_back(N->getOperand(2)); +    Ops.push_back(N->getOperand(3)); +    Ops.push_back(Chain); +    break; +  case NVPTXISD::Suld1DArrayV4I8Trap: +    Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP; +    Ops.push_back(TexHandle); +    Ops.push_back(N->getOperand(2)); +    Ops.push_back(N->getOperand(3)); +    Ops.push_back(Chain); +    break; +  case NVPTXISD::Suld1DArrayV4I16Trap: +    Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP; +    Ops.push_back(TexHandle); +    Ops.push_back(N->getOperand(2)); +    Ops.push_back(N->getOperand(3)); +    Ops.push_back(Chain); +    break; +  case NVPTXISD::Suld1DArrayV4I32Trap: +    Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP; +    Ops.push_back(TexHandle); +    Ops.push_back(N->getOperand(2)); +    Ops.push_back(N->getOperand(3)); +    Ops.push_back(Chain); +    break; +  case NVPTXISD::Suld2DI8Trap: +    Opc = NVPTX::SULD_2D_I8_TRAP; +    Ops.push_back(TexHandle); +    Ops.push_back(N->getOperand(2)); +    Ops.push_back(N->getOperand(3)); +    Ops.push_back(Chain); +    break; +  case NVPTXISD::Suld2DI16Trap: +    Opc = NVPTX::SULD_2D_I16_TRAP; +    Ops.push_back(TexHandle); +    Ops.push_back(N->getOperand(2)); +    Ops.push_back(N->getOperand(3)); +    Ops.push_back(Chain); +    break; +  case NVPTXISD::Suld2DI32Trap: +    Opc = NVPTX::SULD_2D_I32_TRAP; +    Ops.push_back(TexHandle); +    Ops.push_back(N->getOperand(2)); +    Ops.push_back(N->getOperand(3)); +    Ops.push_back(Chain); +    break; +  case NVPTXISD::Suld2DV2I8Trap: +    Opc = NVPTX::SULD_2D_V2I8_TRAP; +    Ops.push_back(TexHandle); +    Ops.push_back(N->getOperand(2)); +    Ops.push_back(N->getOperand(3)); +    Ops.push_back(Chain); +    break; +  case NVPTXISD::Suld2DV2I16Trap: +    Opc = NVPTX::SULD_2D_V2I16_TRAP; +    Ops.push_back(TexHandle); +    Ops.push_back(N->getOperand(2)); +    Ops.push_back(N->getOperand(3)); +    Ops.push_back(Chain); +    break; +  case NVPTXISD::Suld2DV2I32Trap: +    Opc = NVPTX::SULD_2D_V2I32_TRAP; +    Ops.push_back(TexHandle); +    Ops.push_back(N->getOperand(2)); +    Ops.push_back(N->getOperand(3)); +    Ops.push_back(Chain); +    break; +  case NVPTXISD::Suld2DV4I8Trap: +    Opc = NVPTX::SULD_2D_V4I8_TRAP; +    Ops.push_back(TexHandle); +    Ops.push_back(N->getOperand(2)); +    Ops.push_back(N->getOperand(3)); +    Ops.push_back(Chain); +    break; +  case NVPTXISD::Suld2DV4I16Trap: +    Opc = NVPTX::SULD_2D_V4I16_TRAP; +    Ops.push_back(TexHandle); +    Ops.push_back(N->getOperand(2)); +    Ops.push_back(N->getOperand(3)); +    Ops.push_back(Chain); +    break; +  case NVPTXISD::Suld2DV4I32Trap: +    Opc = NVPTX::SULD_2D_V4I32_TRAP; +    Ops.push_back(TexHandle); +    Ops.push_back(N->getOperand(2)); +    Ops.push_back(N->getOperand(3)); +    Ops.push_back(Chain); +    break; +  case NVPTXISD::Suld2DArrayI8Trap: +    Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP; +    Ops.push_back(TexHandle); +    Ops.push_back(N->getOperand(2)); +    Ops.push_back(N->getOperand(3)); +    Ops.push_back(N->getOperand(4)); +    Ops.push_back(Chain); +    break; +  case NVPTXISD::Suld2DArrayI16Trap: +    Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP; +    Ops.push_back(TexHandle); +    Ops.push_back(N->getOperand(2)); +    Ops.push_back(N->getOperand(3)); +    Ops.push_back(N->getOperand(4)); +    Ops.push_back(Chain); +    break; +  case NVPTXISD::Suld2DArrayI32Trap: +    Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP; +    Ops.push_back(TexHandle); +    Ops.push_back(N->getOperand(2)); +    Ops.push_back(N->getOperand(3)); +    Ops.push_back(N->getOperand(4)); +    Ops.push_back(Chain); +    break; +  case NVPTXISD::Suld2DArrayV2I8Trap: +    Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP; +    Ops.push_back(TexHandle); +    Ops.push_back(N->getOperand(2)); +    Ops.push_back(N->getOperand(3)); +    Ops.push_back(N->getOperand(4)); +    Ops.push_back(Chain); +    break; +  case NVPTXISD::Suld2DArrayV2I16Trap: +    Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP; +    Ops.push_back(TexHandle); +    Ops.push_back(N->getOperand(2)); +    Ops.push_back(N->getOperand(3)); +    Ops.push_back(N->getOperand(4)); +    Ops.push_back(Chain); +    break; +  case NVPTXISD::Suld2DArrayV2I32Trap: +    Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP; +    Ops.push_back(TexHandle); +    Ops.push_back(N->getOperand(2)); +    Ops.push_back(N->getOperand(3)); +    Ops.push_back(N->getOperand(4)); +    Ops.push_back(Chain); +    break; +  case NVPTXISD::Suld2DArrayV4I8Trap: +    Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP; +    Ops.push_back(TexHandle); +    Ops.push_back(N->getOperand(2)); +    Ops.push_back(N->getOperand(3)); +    Ops.push_back(N->getOperand(4)); +    Ops.push_back(Chain); +    break; +  case NVPTXISD::Suld2DArrayV4I16Trap: +    Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP; +    Ops.push_back(TexHandle); +    Ops.push_back(N->getOperand(2)); +    Ops.push_back(N->getOperand(3)); +    Ops.push_back(N->getOperand(4)); +    Ops.push_back(Chain); +    break; +  case NVPTXISD::Suld2DArrayV4I32Trap: +    Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP; +    Ops.push_back(TexHandle); +    Ops.push_back(N->getOperand(2)); +    Ops.push_back(N->getOperand(3)); +    Ops.push_back(N->getOperand(4)); +    Ops.push_back(Chain); +    break; +  case NVPTXISD::Suld3DI8Trap: +    Opc = NVPTX::SULD_3D_I8_TRAP; +    Ops.push_back(TexHandle); +    Ops.push_back(N->getOperand(2)); +    Ops.push_back(N->getOperand(3)); +    Ops.push_back(N->getOperand(4)); +    Ops.push_back(Chain); +    break; +  case NVPTXISD::Suld3DI16Trap: +    Opc = NVPTX::SULD_3D_I16_TRAP; +    Ops.push_back(TexHandle); +    Ops.push_back(N->getOperand(2)); +    Ops.push_back(N->getOperand(3)); +    Ops.push_back(N->getOperand(4)); +    Ops.push_back(Chain); +    break; +  case NVPTXISD::Suld3DI32Trap: +    Opc = NVPTX::SULD_3D_I32_TRAP; +    Ops.push_back(TexHandle); +    Ops.push_back(N->getOperand(2)); +    Ops.push_back(N->getOperand(3)); +    Ops.push_back(N->getOperand(4)); +    Ops.push_back(Chain); +    break; +  case NVPTXISD::Suld3DV2I8Trap: +    Opc = NVPTX::SULD_3D_V2I8_TRAP; +    Ops.push_back(TexHandle); +    Ops.push_back(N->getOperand(2)); +    Ops.push_back(N->getOperand(3)); +    Ops.push_back(N->getOperand(4)); +    Ops.push_back(Chain); +    break; +  case NVPTXISD::Suld3DV2I16Trap: +    Opc = NVPTX::SULD_3D_V2I16_TRAP; +    Ops.push_back(TexHandle); +    Ops.push_back(N->getOperand(2)); +    Ops.push_back(N->getOperand(3)); +    Ops.push_back(N->getOperand(4)); +    Ops.push_back(Chain); +    break; +  case NVPTXISD::Suld3DV2I32Trap: +    Opc = NVPTX::SULD_3D_V2I32_TRAP; +    Ops.push_back(TexHandle); +    Ops.push_back(N->getOperand(2)); +    Ops.push_back(N->getOperand(3)); +    Ops.push_back(N->getOperand(4)); +    Ops.push_back(Chain); +    break; +  case NVPTXISD::Suld3DV4I8Trap: +    Opc = NVPTX::SULD_3D_V4I8_TRAP; +    Ops.push_back(TexHandle); +    Ops.push_back(N->getOperand(2)); +    Ops.push_back(N->getOperand(3)); +    Ops.push_back(N->getOperand(4)); +    Ops.push_back(Chain); +    break; +  case NVPTXISD::Suld3DV4I16Trap: +    Opc = NVPTX::SULD_3D_V4I16_TRAP; +    Ops.push_back(TexHandle); +    Ops.push_back(N->getOperand(2)); +    Ops.push_back(N->getOperand(3)); +    Ops.push_back(N->getOperand(4)); +    Ops.push_back(Chain); +    break; +  case NVPTXISD::Suld3DV4I32Trap: +    Opc = NVPTX::SULD_3D_V4I32_TRAP; +    Ops.push_back(TexHandle); +    Ops.push_back(N->getOperand(2)); +    Ops.push_back(N->getOperand(3)); +    Ops.push_back(N->getOperand(4)); +    Ops.push_back(Chain); +    break; +  } +  Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); +  return Ret; +} +  // SelectDirectAddr - Match a direct address for DAG.  // A direct address could be a globaladdress or externalsymbol.  bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) { @@ -2464,14 +3052,18 @@ bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,  bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,                                                   unsigned int spN) const { -  const Value *Src = NULL; +  const Value *Src = nullptr;    // Even though MemIntrinsicSDNode is a subclas of MemSDNode,    // the classof() for MemSDNode does not include MemIntrinsicSDNode    // (See SelectionDAGNodes.h). So we need to check for both.    if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) { -    Src = mN->getSrcValue(); +    if (spN == 0 && mN->getMemOperand()->getPseudoValue()) +      return true; +    Src = mN->getMemOperand()->getValue();    } else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) { -    Src = mN->getSrcValue(); +    if (spN == 0 && mN->getMemOperand()->getPseudoValue()) +      return true; +    Src = mN->getMemOperand()->getValue();    }    if (!Src)      return false; diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index 93ad169..11f92e7 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -11,8 +11,6 @@  //  //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "nvptx-isel" -  #include "NVPTX.h"  #include "NVPTXISelLowering.h"  #include "NVPTXRegisterInfo.h" @@ -46,19 +44,22 @@ public:                               CodeGenOpt::Level   OptLevel);    // Pass Name -  virtual const char *getPassName() const { +  const char *getPassName() const override {      return "NVPTX DAG->DAG Pattern Instruction Selection";    }    const NVPTXSubtarget &Subtarget; -  virtual bool SelectInlineAsmMemoryOperand( -      const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps); +  bool SelectInlineAsmMemoryOperand(const SDValue &Op, +                                    char ConstraintCode, +                                    std::vector<SDValue> &OutOps) override;  private:  // Include the pieces autogenerated from the target description.  #include "NVPTXGenDAGISel.inc" -  SDNode *Select(SDNode *N); +  SDNode *Select(SDNode *N) override; +  SDNode *SelectIntrinsicNoChain(SDNode *N); +  SDNode *SelectTexSurfHandle(SDNode *N);    SDNode *SelectLoad(SDNode *N);    SDNode *SelectLoadVector(SDNode *N);    SDNode *SelectLDGLDUVector(SDNode *N); @@ -68,6 +69,8 @@ private:    SDNode *SelectStoreRetval(SDNode *N);    SDNode *SelectStoreParam(SDNode *N);    SDNode *SelectAddrSpaceCast(SDNode *N); +  SDNode *SelectTextureIntrinsic(SDNode *N); +  SDNode *SelectSurfaceIntrinsic(SDNode *N);    inline SDValue getI32Imm(unsigned Imm) {      return CurDAG->getTargetConstant(Imm, MVT::i32); diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index 8e25a65..b0943be 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -75,7 +75,7 @@ static bool IsPTXVectorType(MVT VT) {  /// LowerCall, and LowerReturn.  static void ComputePTXValueVTs(const TargetLowering &TLI, Type *Ty,                                 SmallVectorImpl<EVT> &ValueVTs, -                               SmallVectorImpl<uint64_t> *Offsets = 0, +                               SmallVectorImpl<uint64_t> *Offsets = nullptr,                                 uint64_t StartingOffset = 0) {    SmallVector<EVT, 16> TempVTs;    SmallVector<uint64_t, 16> TempOffsets; @@ -245,7 +245,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)  const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {    switch (Opcode) {    default: -    return 0; +    return nullptr;    case NVPTXISD::CALL:      return "NVPTXISD::CALL";    case NVPTXISD::RET_FLAG: @@ -328,6 +328,116 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {      return "NVPTXISD::StoreV2";    case NVPTXISD::StoreV4:      return "NVPTXISD::StoreV4"; +  case NVPTXISD::Tex1DFloatI32:        return "NVPTXISD::Tex1DFloatI32"; +  case NVPTXISD::Tex1DFloatFloat:      return "NVPTXISD::Tex1DFloatFloat"; +  case NVPTXISD::Tex1DFloatFloatLevel: +    return "NVPTXISD::Tex1DFloatFloatLevel"; +  case NVPTXISD::Tex1DFloatFloatGrad: +    return "NVPTXISD::Tex1DFloatFloatGrad"; +  case NVPTXISD::Tex1DI32I32:          return "NVPTXISD::Tex1DI32I32"; +  case NVPTXISD::Tex1DI32Float:        return "NVPTXISD::Tex1DI32Float"; +  case NVPTXISD::Tex1DI32FloatLevel: +    return "NVPTXISD::Tex1DI32FloatLevel"; +  case NVPTXISD::Tex1DI32FloatGrad: +    return "NVPTXISD::Tex1DI32FloatGrad"; +  case NVPTXISD::Tex1DArrayFloatI32:   return "NVPTXISD::Tex2DArrayFloatI32"; +  case NVPTXISD::Tex1DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat"; +  case NVPTXISD::Tex1DArrayFloatFloatLevel: +    return "NVPTXISD::Tex2DArrayFloatFloatLevel"; +  case NVPTXISD::Tex1DArrayFloatFloatGrad: +    return "NVPTXISD::Tex2DArrayFloatFloatGrad"; +  case NVPTXISD::Tex1DArrayI32I32:     return "NVPTXISD::Tex2DArrayI32I32"; +  case NVPTXISD::Tex1DArrayI32Float:   return "NVPTXISD::Tex2DArrayI32Float"; +  case NVPTXISD::Tex1DArrayI32FloatLevel: +    return "NVPTXISD::Tex2DArrayI32FloatLevel"; +  case NVPTXISD::Tex1DArrayI32FloatGrad: +    return "NVPTXISD::Tex2DArrayI32FloatGrad"; +  case NVPTXISD::Tex2DFloatI32:        return "NVPTXISD::Tex2DFloatI32"; +  case NVPTXISD::Tex2DFloatFloat:      return "NVPTXISD::Tex2DFloatFloat"; +  case NVPTXISD::Tex2DFloatFloatLevel: +    return "NVPTXISD::Tex2DFloatFloatLevel"; +  case NVPTXISD::Tex2DFloatFloatGrad: +    return "NVPTXISD::Tex2DFloatFloatGrad"; +  case NVPTXISD::Tex2DI32I32:          return "NVPTXISD::Tex2DI32I32"; +  case NVPTXISD::Tex2DI32Float:        return "NVPTXISD::Tex2DI32Float"; +  case NVPTXISD::Tex2DI32FloatLevel: +    return "NVPTXISD::Tex2DI32FloatLevel"; +  case NVPTXISD::Tex2DI32FloatGrad: +    return "NVPTXISD::Tex2DI32FloatGrad"; +  case NVPTXISD::Tex2DArrayFloatI32:   return "NVPTXISD::Tex2DArrayFloatI32"; +  case NVPTXISD::Tex2DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat"; +  case NVPTXISD::Tex2DArrayFloatFloatLevel: +    return "NVPTXISD::Tex2DArrayFloatFloatLevel"; +  case NVPTXISD::Tex2DArrayFloatFloatGrad: +    return "NVPTXISD::Tex2DArrayFloatFloatGrad"; +  case NVPTXISD::Tex2DArrayI32I32:     return "NVPTXISD::Tex2DArrayI32I32"; +  case NVPTXISD::Tex2DArrayI32Float:   return "NVPTXISD::Tex2DArrayI32Float"; +  case NVPTXISD::Tex2DArrayI32FloatLevel: +    return "NVPTXISD::Tex2DArrayI32FloatLevel"; +  case NVPTXISD::Tex2DArrayI32FloatGrad: +    return "NVPTXISD::Tex2DArrayI32FloatGrad"; +  case NVPTXISD::Tex3DFloatI32:        return "NVPTXISD::Tex3DFloatI32"; +  case NVPTXISD::Tex3DFloatFloat:      return "NVPTXISD::Tex3DFloatFloat"; +  case NVPTXISD::Tex3DFloatFloatLevel: +    return "NVPTXISD::Tex3DFloatFloatLevel"; +  case NVPTXISD::Tex3DFloatFloatGrad: +    return "NVPTXISD::Tex3DFloatFloatGrad"; +  case NVPTXISD::Tex3DI32I32:          return "NVPTXISD::Tex3DI32I32"; +  case NVPTXISD::Tex3DI32Float:        return "NVPTXISD::Tex3DI32Float"; +  case NVPTXISD::Tex3DI32FloatLevel: +    return "NVPTXISD::Tex3DI32FloatLevel"; +  case NVPTXISD::Tex3DI32FloatGrad: +    return "NVPTXISD::Tex3DI32FloatGrad"; + +  case NVPTXISD::Suld1DI8Trap:          return "NVPTXISD::Suld1DI8Trap"; +  case NVPTXISD::Suld1DI16Trap:         return "NVPTXISD::Suld1DI16Trap"; +  case NVPTXISD::Suld1DI32Trap:         return "NVPTXISD::Suld1DI32Trap"; +  case NVPTXISD::Suld1DV2I8Trap:        return "NVPTXISD::Suld1DV2I8Trap"; +  case NVPTXISD::Suld1DV2I16Trap:       return "NVPTXISD::Suld1DV2I16Trap"; +  case NVPTXISD::Suld1DV2I32Trap:       return "NVPTXISD::Suld1DV2I32Trap"; +  case NVPTXISD::Suld1DV4I8Trap:        return "NVPTXISD::Suld1DV4I8Trap"; +  case NVPTXISD::Suld1DV4I16Trap:       return "NVPTXISD::Suld1DV4I16Trap"; +  case NVPTXISD::Suld1DV4I32Trap:       return "NVPTXISD::Suld1DV4I32Trap"; + +  case NVPTXISD::Suld1DArrayI8Trap:     return "NVPTXISD::Suld1DArrayI8Trap"; +  case NVPTXISD::Suld1DArrayI16Trap:    return "NVPTXISD::Suld1DArrayI16Trap"; +  case NVPTXISD::Suld1DArrayI32Trap:    return "NVPTXISD::Suld1DArrayI32Trap"; +  case NVPTXISD::Suld1DArrayV2I8Trap:   return "NVPTXISD::Suld1DArrayV2I8Trap"; +  case NVPTXISD::Suld1DArrayV2I16Trap:  return "NVPTXISD::Suld1DArrayV2I16Trap"; +  case NVPTXISD::Suld1DArrayV2I32Trap:  return "NVPTXISD::Suld1DArrayV2I32Trap"; +  case NVPTXISD::Suld1DArrayV4I8Trap:   return "NVPTXISD::Suld1DArrayV4I8Trap"; +  case NVPTXISD::Suld1DArrayV4I16Trap:  return "NVPTXISD::Suld1DArrayV4I16Trap"; +  case NVPTXISD::Suld1DArrayV4I32Trap:  return "NVPTXISD::Suld1DArrayV4I32Trap"; + +  case NVPTXISD::Suld2DI8Trap:          return "NVPTXISD::Suld2DI8Trap"; +  case NVPTXISD::Suld2DI16Trap:         return "NVPTXISD::Suld2DI16Trap"; +  case NVPTXISD::Suld2DI32Trap:         return "NVPTXISD::Suld2DI32Trap"; +  case NVPTXISD::Suld2DV2I8Trap:        return "NVPTXISD::Suld2DV2I8Trap"; +  case NVPTXISD::Suld2DV2I16Trap:       return "NVPTXISD::Suld2DV2I16Trap"; +  case NVPTXISD::Suld2DV2I32Trap:       return "NVPTXISD::Suld2DV2I32Trap"; +  case NVPTXISD::Suld2DV4I8Trap:        return "NVPTXISD::Suld2DV4I8Trap"; +  case NVPTXISD::Suld2DV4I16Trap:       return "NVPTXISD::Suld2DV4I16Trap"; +  case NVPTXISD::Suld2DV4I32Trap:       return "NVPTXISD::Suld2DV4I32Trap"; + +  case NVPTXISD::Suld2DArrayI8Trap:     return "NVPTXISD::Suld2DArrayI8Trap"; +  case NVPTXISD::Suld2DArrayI16Trap:    return "NVPTXISD::Suld2DArrayI16Trap"; +  case NVPTXISD::Suld2DArrayI32Trap:    return "NVPTXISD::Suld2DArrayI32Trap"; +  case NVPTXISD::Suld2DArrayV2I8Trap:   return "NVPTXISD::Suld2DArrayV2I8Trap"; +  case NVPTXISD::Suld2DArrayV2I16Trap:  return "NVPTXISD::Suld2DArrayV2I16Trap"; +  case NVPTXISD::Suld2DArrayV2I32Trap:  return "NVPTXISD::Suld2DArrayV2I32Trap"; +  case NVPTXISD::Suld2DArrayV4I8Trap:   return "NVPTXISD::Suld2DArrayV4I8Trap"; +  case NVPTXISD::Suld2DArrayV4I16Trap:  return "NVPTXISD::Suld2DArrayV4I16Trap"; +  case NVPTXISD::Suld2DArrayV4I32Trap:  return "NVPTXISD::Suld2DArrayV4I32Trap"; + +  case NVPTXISD::Suld3DI8Trap:          return "NVPTXISD::Suld3DI8Trap"; +  case NVPTXISD::Suld3DI16Trap:         return "NVPTXISD::Suld3DI16Trap"; +  case NVPTXISD::Suld3DI32Trap:         return "NVPTXISD::Suld3DI32Trap"; +  case NVPTXISD::Suld3DV2I8Trap:        return "NVPTXISD::Suld3DV2I8Trap"; +  case NVPTXISD::Suld3DV2I16Trap:       return "NVPTXISD::Suld3DV2I16Trap"; +  case NVPTXISD::Suld3DV2I32Trap:       return "NVPTXISD::Suld3DV2I32Trap"; +  case NVPTXISD::Suld3DV4I8Trap:        return "NVPTXISD::Suld3DV4I8Trap"; +  case NVPTXISD::Suld3DV4I16Trap:       return "NVPTXISD::Suld3DV4I16Trap"; +  case NVPTXISD::Suld3DV4I32Trap:       return "NVPTXISD::Suld3DV4I32Trap";    }  } @@ -526,7 +636,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,    SDValue Chain = CLI.Chain;    SDValue Callee = CLI.Callee;    bool &isTailCall = CLI.IsTailCall; -  ArgListTy &Args = CLI.Args; +  ArgListTy &Args = CLI.getArgs();    Type *retTy = CLI.RetTy;    ImmutableCallSite *CS = CLI.CS; @@ -575,7 +685,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,                                        DAG.getConstant(paramCount, MVT::i32),                                        DAG.getConstant(sz, MVT::i32), InFlag };          Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, -                            DeclareParamOps, 5); +                            DeclareParamOps);          InFlag = Chain.getValue(1);          unsigned curOffset = 0;          for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { @@ -599,7 +709,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,                                         DAG.getConstant(curOffset, MVT::i32),                                         StVal, InFlag };              Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, -                                            CopyParamVTs, &CopyParamOps[0], 5, +                                            CopyParamVTs, CopyParamOps,                                              elemtype, MachinePointerInfo());              InFlag = Chain.getValue(1);              curOffset += sz / 8; @@ -621,7 +731,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,                                        DAG.getConstant(paramCount, MVT::i32),                                        DAG.getConstant(sz, MVT::i32), InFlag };          Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, -                            DeclareParamOps, 5); +                            DeclareParamOps);          InFlag = Chain.getValue(1);          unsigned NumElts = ObjectVT.getVectorNumElements();          EVT EltVT = ObjectVT.getVectorElementType(); @@ -644,7 +754,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,                                       DAG.getConstant(0, MVT::i32), Elt,                                       InFlag };            Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, -                                          CopyParamVTs, &CopyParamOps[0], 5, +                                          CopyParamVTs, CopyParamOps,                                            MemVT, MachinePointerInfo());            InFlag = Chain.getValue(1);          } else if (NumElts == 2) { @@ -661,7 +771,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,                                       DAG.getConstant(0, MVT::i32), Elt0, Elt1,                                       InFlag };            Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParamV2, dl, -                                          CopyParamVTs, &CopyParamOps[0], 6, +                                          CopyParamVTs, CopyParamOps,                                            MemVT, MachinePointerInfo());            InFlag = Chain.getValue(1);          } else { @@ -735,9 +845,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,              Ops.push_back(InFlag);              SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); -            Chain = DAG.getMemIntrinsicNode(Opc, dl, CopyParamVTs, &Ops[0], -                                            Ops.size(), MemVT, -                                            MachinePointerInfo()); +            Chain = DAG.getMemIntrinsicNode(Opc, dl, CopyParamVTs, Ops, +                                            MemVT, MachinePointerInfo());              InFlag = Chain.getValue(1);              curOffset += PerStoreOffset;            } @@ -762,7 +871,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,                                      DAG.getConstant(sz, MVT::i32),                                      DAG.getConstant(0, MVT::i32), InFlag };        Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs, -                          DeclareParamOps, 5); +                          DeclareParamOps);        InFlag = Chain.getValue(1);        SDValue OutV = OutVals[OIdx];        if (needExtend) { @@ -781,7 +890,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,          opcode = NVPTXISD::StoreParamU32;        else if (Outs[OIdx].Flags.isSExt())          opcode = NVPTXISD::StoreParamS32; -      Chain = DAG.getMemIntrinsicNode(opcode, dl, CopyParamVTs, CopyParamOps, 5, +      Chain = DAG.getMemIntrinsicNode(opcode, dl, CopyParamVTs, CopyParamOps,                                        VT, MachinePointerInfo());        InFlag = Chain.getValue(1); @@ -806,7 +915,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,        InFlag      };      Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, -                        DeclareParamOps, 5); +                        DeclareParamOps);      InFlag = Chain.getValue(1);      unsigned curOffset = 0;      for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { @@ -834,7 +943,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,                                     DAG.getConstant(curOffset, MVT::i32), theVal,                                     InFlag };          Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs, -                                        CopyParamOps, 5, elemtype, +                                        CopyParamOps, elemtype,                                          MachinePointerInfo());          InFlag = Chain.getValue(1); @@ -865,7 +974,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,                                    DAG.getConstant(resultsz, MVT::i32),                                    DAG.getConstant(0, MVT::i32), InFlag };        Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs, -                          DeclareRetOps, 5); +                          DeclareRetOps);        InFlag = Chain.getValue(1);      } else {        retAlignment = getArgumentAlignment(Callee, CS, retTy, 0); @@ -875,7 +984,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,                                    DAG.getConstant(resultsz / 8, MVT::i32),                                    DAG.getConstant(0, MVT::i32), InFlag };        Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs, -                          DeclareRetOps, 5); +                          DeclareRetOps);        InFlag = Chain.getValue(1);      }    } @@ -895,7 +1004,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,      SDValue ProtoOps[] = {        Chain, DAG.getTargetExternalSymbol(ProtoStr, MVT::i32), InFlag,      }; -    Chain = DAG.getNode(NVPTXISD::CallPrototype, dl, ProtoVTs, &ProtoOps[0], 3); +    Chain = DAG.getNode(NVPTXISD::CallPrototype, dl, ProtoVTs, ProtoOps);      InFlag = Chain.getValue(1);    }    // Op to just print "call" @@ -904,20 +1013,20 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,      Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, MVT::i32), InFlag    };    Chain = DAG.getNode(Func ? (NVPTXISD::PrintCallUni) : (NVPTXISD::PrintCall), -                      dl, PrintCallVTs, PrintCallOps, 3); +                      dl, PrintCallVTs, PrintCallOps);    InFlag = Chain.getValue(1);    // Ops to print out the function name    SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue);    SDValue CallVoidOps[] = { Chain, Callee, InFlag }; -  Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps, 3); +  Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps);    InFlag = Chain.getValue(1);    // Ops to print out the param list    SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue);    SDValue CallArgBeginOps[] = { Chain, InFlag };    Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs, -                      CallArgBeginOps, 2); +                      CallArgBeginOps);    InFlag = Chain.getValue(1);    for (unsigned i = 0, e = paramCount; i != e; ++i) { @@ -929,21 +1038,20 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,      SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue);      SDValue CallArgOps[] = { Chain, DAG.getConstant(1, MVT::i32),                               DAG.getConstant(i, MVT::i32), InFlag }; -    Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps, 4); +    Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps);      InFlag = Chain.getValue(1);    }    SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue);    SDValue CallArgEndOps[] = { Chain, DAG.getConstant(Func ? 1 : 0, MVT::i32),                                InFlag }; -  Chain = -      DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps, 3); +  Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps);    InFlag = Chain.getValue(1);    if (!Func) {      SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue);      SDValue PrototypeOps[] = { Chain, DAG.getConstant(uniqueCallSite, MVT::i32),                                 InFlag }; -    Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps, 3); +    Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps);      InFlag = Chain.getValue(1);    } @@ -962,7 +1070,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,        if (NumElts == 1) {          // Just a simple load -        std::vector<EVT> LoadRetVTs; +        SmallVector<EVT, 4> LoadRetVTs;          if (needTruncate) {            // If loading i1 result, generate            //   load i16 @@ -972,15 +1080,14 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,            LoadRetVTs.push_back(EltVT);          LoadRetVTs.push_back(MVT::Other);          LoadRetVTs.push_back(MVT::Glue); -        std::vector<SDValue> LoadRetOps; +        SmallVector<SDValue, 4> LoadRetOps;          LoadRetOps.push_back(Chain);          LoadRetOps.push_back(DAG.getConstant(1, MVT::i32));          LoadRetOps.push_back(DAG.getConstant(0, MVT::i32));          LoadRetOps.push_back(InFlag);          SDValue retval = DAG.getMemIntrinsicNode(              NVPTXISD::LoadParam, dl, -            DAG.getVTList(&LoadRetVTs[0], LoadRetVTs.size()), &LoadRetOps[0], -            LoadRetOps.size(), EltVT, MachinePointerInfo()); +            DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo());          Chain = retval.getValue(1);          InFlag = retval.getValue(2);          SDValue Ret0 = retval; @@ -989,7 +1096,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,          InVals.push_back(Ret0);        } else if (NumElts == 2) {          // LoadV2 -        std::vector<EVT> LoadRetVTs; +        SmallVector<EVT, 4> LoadRetVTs;          if (needTruncate) {            // If loading i1 result, generate            //   load i16 @@ -1002,15 +1109,14 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,          }          LoadRetVTs.push_back(MVT::Other);          LoadRetVTs.push_back(MVT::Glue); -        std::vector<SDValue> LoadRetOps; +        SmallVector<SDValue, 4> LoadRetOps;          LoadRetOps.push_back(Chain);          LoadRetOps.push_back(DAG.getConstant(1, MVT::i32));          LoadRetOps.push_back(DAG.getConstant(0, MVT::i32));          LoadRetOps.push_back(InFlag);          SDValue retval = DAG.getMemIntrinsicNode(              NVPTXISD::LoadParamV2, dl, -            DAG.getVTList(&LoadRetVTs[0], LoadRetVTs.size()), &LoadRetOps[0], -            LoadRetOps.size(), EltVT, MachinePointerInfo()); +            DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo());          Chain = retval.getValue(2);          InFlag = retval.getValue(3);          SDValue Ret0 = retval.getValue(0); @@ -1054,8 +1160,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,            LoadRetOps.push_back(DAG.getConstant(Ofst, MVT::i32));            LoadRetOps.push_back(InFlag);            SDValue retval = DAG.getMemIntrinsicNode( -              Opc, dl, DAG.getVTList(&LoadRetVTs[0], LoadRetVTs.size()), -              &LoadRetOps[0], LoadRetOps.size(), EltVT, MachinePointerInfo()); +              Opc, dl, DAG.getVTList(LoadRetVTs), +              LoadRetOps, EltVT, MachinePointerInfo());            if (VecSize == 2) {              Chain = retval.getValue(2);              InFlag = retval.getValue(3); @@ -1110,8 +1216,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,          LoadRetOps.push_back(InFlag);          SDValue retval = DAG.getMemIntrinsicNode(              NVPTXISD::LoadParam, dl, -            DAG.getVTList(&LoadRetVTs[0], LoadRetVTs.size()), &LoadRetOps[0], -            LoadRetOps.size(), TheLoadType, MachinePointerInfo()); +            DAG.getVTList(LoadRetVTs), LoadRetOps, +            TheLoadType, MachinePointerInfo());          Chain = retval.getValue(1);          InFlag = retval.getValue(2);          SDValue Ret0 = retval.getValue(0); @@ -1153,8 +1259,7 @@ NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {                                  DAG.getIntPtrConstant(j)));      }    } -  return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), &Ops[0], -                     Ops.size()); +  return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), Ops);  }  SDValue @@ -1209,7 +1314,7 @@ SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {    // load, so we build a MergeValues node for it. See ExpandUnalignedLoad()    // in LegalizeDAG.cpp which also uses MergeValues.    SDValue Ops[] = { result, LD->getChain() }; -  return DAG.getMergeValues(Ops, 2, dl); +  return DAG.getMergeValues(Ops, dl);  }  SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { @@ -1297,7 +1402,7 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {      MemSDNode *MemSD = cast<MemSDNode>(N);      SDValue NewSt = DAG.getMemIntrinsicNode( -        Opcode, DL, DAG.getVTList(MVT::Other), &Ops[0], Ops.size(), +        Opcode, DL, DAG.getVTList(MVT::Other), Ops,          MemSD->getMemoryVT(), MemSD->getMemOperand());      //return DCI.CombineTo(N, NewSt, true); @@ -1429,7 +1534,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(      if (isImageOrSamplerVal(              theArgs[i],              (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent() -                                     : 0))) { +                                     : nullptr))) {        assert(isKernel && "Only kernels can have image/sampler params");        InVals.push_back(DAG.getConstant(i + 1, MVT::i32));        continue; @@ -1683,8 +1788,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(    //}    if (!OutChains.empty()) -    DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &OutChains[0], -                            OutChains.size())); +    DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains));    return Chain;  } @@ -1726,7 +1830,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,          StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);        SDValue Ops[] = { Chain, DAG.getConstant(0, MVT::i32), StoreVal };        Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl, -                                      DAG.getVTList(MVT::Other), &Ops[0], 3, +                                      DAG.getVTList(MVT::Other), Ops,                                        EltVT, MachinePointerInfo());      } else if (NumElts == 2) { @@ -1742,7 +1846,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,        SDValue Ops[] = { Chain, DAG.getConstant(0, MVT::i32), StoreVal0,                          StoreVal1 };        Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetvalV2, dl, -                                      DAG.getVTList(MVT::Other), &Ops[0], 4, +                                      DAG.getVTList(MVT::Other), Ops,                                        EltVT, MachinePointerInfo());      } else {        // V4 stores @@ -1814,8 +1918,8 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,          // Chain = DAG.getNode(Opc, dl, MVT::Other, &Ops[0], Ops.size());          Chain = -            DAG.getMemIntrinsicNode(Opc, dl, DAG.getVTList(MVT::Other), &Ops[0], -                                    Ops.size(), EltVT, MachinePointerInfo()); +            DAG.getMemIntrinsicNode(Opc, dl, DAG.getVTList(MVT::Other), Ops, +                                    EltVT, MachinePointerInfo());          Offset += PerStoreOffset;        }      } @@ -1852,8 +1956,8 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,          SDValue Ops[] = { Chain, DAG.getConstant(SizeSoFar, MVT::i32), TmpVal };          Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl, -                                        DAG.getVTList(MVT::Other), &Ops[0], -                                        3, TheStoreType, +                                        DAG.getVTList(MVT::Other), Ops, +                                        TheStoreType,                                          MachinePointerInfo());          if(TheValType.isVector())            SizeSoFar +=  @@ -1891,6 +1995,195 @@ bool NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const {    return false;  } +static unsigned getOpcForTextureInstr(unsigned Intrinsic) { +  switch (Intrinsic) { +  default: +    return 0; + +  case Intrinsic::nvvm_tex_1d_v4f32_i32: +    return NVPTXISD::Tex1DFloatI32; +  case Intrinsic::nvvm_tex_1d_v4f32_f32: +    return NVPTXISD::Tex1DFloatFloat; +  case Intrinsic::nvvm_tex_1d_level_v4f32_f32: +    return NVPTXISD::Tex1DFloatFloatLevel; +  case Intrinsic::nvvm_tex_1d_grad_v4f32_f32: +    return NVPTXISD::Tex1DFloatFloatGrad; +  case Intrinsic::nvvm_tex_1d_v4i32_i32: +    return NVPTXISD::Tex1DI32I32; +  case Intrinsic::nvvm_tex_1d_v4i32_f32: +    return NVPTXISD::Tex1DI32Float; +  case Intrinsic::nvvm_tex_1d_level_v4i32_f32: +    return NVPTXISD::Tex1DI32FloatLevel; +  case Intrinsic::nvvm_tex_1d_grad_v4i32_f32: +    return NVPTXISD::Tex1DI32FloatGrad; + +  case Intrinsic::nvvm_tex_1d_array_v4f32_i32: +    return NVPTXISD::Tex1DArrayFloatI32; +  case Intrinsic::nvvm_tex_1d_array_v4f32_f32: +    return NVPTXISD::Tex1DArrayFloatFloat; +  case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32: +    return NVPTXISD::Tex1DArrayFloatFloatLevel; +  case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32: +    return NVPTXISD::Tex1DArrayFloatFloatGrad; +  case Intrinsic::nvvm_tex_1d_array_v4i32_i32: +    return NVPTXISD::Tex1DArrayI32I32; +  case Intrinsic::nvvm_tex_1d_array_v4i32_f32: +    return NVPTXISD::Tex1DArrayI32Float; +  case Intrinsic::nvvm_tex_1d_array_level_v4i32_f32: +    return NVPTXISD::Tex1DArrayI32FloatLevel; +  case Intrinsic::nvvm_tex_1d_array_grad_v4i32_f32: +    return NVPTXISD::Tex1DArrayI32FloatGrad; + +  case Intrinsic::nvvm_tex_2d_v4f32_i32: +    return NVPTXISD::Tex2DFloatI32; +  case Intrinsic::nvvm_tex_2d_v4f32_f32: +    return NVPTXISD::Tex2DFloatFloat; +  case Intrinsic::nvvm_tex_2d_level_v4f32_f32: +    return NVPTXISD::Tex2DFloatFloatLevel; +  case Intrinsic::nvvm_tex_2d_grad_v4f32_f32: +    return NVPTXISD::Tex2DFloatFloatGrad; +  case Intrinsic::nvvm_tex_2d_v4i32_i32: +    return NVPTXISD::Tex2DI32I32; +  case Intrinsic::nvvm_tex_2d_v4i32_f32: +    return NVPTXISD::Tex2DI32Float; +  case Intrinsic::nvvm_tex_2d_level_v4i32_f32: +    return NVPTXISD::Tex2DI32FloatLevel; +  case Intrinsic::nvvm_tex_2d_grad_v4i32_f32: +    return NVPTXISD::Tex2DI32FloatGrad; + +  case Intrinsic::nvvm_tex_2d_array_v4f32_i32: +    return NVPTXISD::Tex2DArrayFloatI32; +  case Intrinsic::nvvm_tex_2d_array_v4f32_f32: +    return NVPTXISD::Tex2DArrayFloatFloat; +  case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32: +    return NVPTXISD::Tex2DArrayFloatFloatLevel; +  case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32: +    return NVPTXISD::Tex2DArrayFloatFloatGrad; +  case Intrinsic::nvvm_tex_2d_array_v4i32_i32: +    return NVPTXISD::Tex2DArrayI32I32; +  case Intrinsic::nvvm_tex_2d_array_v4i32_f32: +    return NVPTXISD::Tex2DArrayI32Float; +  case Intrinsic::nvvm_tex_2d_array_level_v4i32_f32: +    return NVPTXISD::Tex2DArrayI32FloatLevel; +  case Intrinsic::nvvm_tex_2d_array_grad_v4i32_f32: +    return NVPTXISD::Tex2DArrayI32FloatGrad; + +  case Intrinsic::nvvm_tex_3d_v4f32_i32: +    return NVPTXISD::Tex3DFloatI32; +  case Intrinsic::nvvm_tex_3d_v4f32_f32: +    return NVPTXISD::Tex3DFloatFloat; +  case Intrinsic::nvvm_tex_3d_level_v4f32_f32: +    return NVPTXISD::Tex3DFloatFloatLevel; +  case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: +    return NVPTXISD::Tex3DFloatFloatGrad; +  case Intrinsic::nvvm_tex_3d_v4i32_i32: +    return NVPTXISD::Tex3DI32I32; +  case Intrinsic::nvvm_tex_3d_v4i32_f32: +    return NVPTXISD::Tex3DI32Float; +  case Intrinsic::nvvm_tex_3d_level_v4i32_f32: +    return NVPTXISD::Tex3DI32FloatLevel; +  case Intrinsic::nvvm_tex_3d_grad_v4i32_f32: +    return NVPTXISD::Tex3DI32FloatGrad; +  } +} + +static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) { +  switch (Intrinsic) { +  default: +    return 0; +  case Intrinsic::nvvm_suld_1d_i8_trap: +    return NVPTXISD::Suld1DI8Trap; +  case Intrinsic::nvvm_suld_1d_i16_trap: +    return NVPTXISD::Suld1DI16Trap; +  case Intrinsic::nvvm_suld_1d_i32_trap: +    return NVPTXISD::Suld1DI32Trap; +  case Intrinsic::nvvm_suld_1d_v2i8_trap: +    return NVPTXISD::Suld1DV2I8Trap; +  case Intrinsic::nvvm_suld_1d_v2i16_trap: +    return NVPTXISD::Suld1DV2I16Trap; +  case Intrinsic::nvvm_suld_1d_v2i32_trap: +    return NVPTXISD::Suld1DV2I32Trap; +  case Intrinsic::nvvm_suld_1d_v4i8_trap: +    return NVPTXISD::Suld1DV4I8Trap; +  case Intrinsic::nvvm_suld_1d_v4i16_trap: +    return NVPTXISD::Suld1DV4I16Trap; +  case Intrinsic::nvvm_suld_1d_v4i32_trap: +    return NVPTXISD::Suld1DV4I32Trap; +  case Intrinsic::nvvm_suld_1d_array_i8_trap: +    return NVPTXISD::Suld1DArrayI8Trap; +  case Intrinsic::nvvm_suld_1d_array_i16_trap: +    return NVPTXISD::Suld1DArrayI16Trap; +  case Intrinsic::nvvm_suld_1d_array_i32_trap: +    return NVPTXISD::Suld1DArrayI32Trap; +  case Intrinsic::nvvm_suld_1d_array_v2i8_trap: +    return NVPTXISD::Suld1DArrayV2I8Trap; +  case Intrinsic::nvvm_suld_1d_array_v2i16_trap: +    return NVPTXISD::Suld1DArrayV2I16Trap; +  case Intrinsic::nvvm_suld_1d_array_v2i32_trap: +    return NVPTXISD::Suld1DArrayV2I32Trap; +  case Intrinsic::nvvm_suld_1d_array_v4i8_trap: +    return NVPTXISD::Suld1DArrayV4I8Trap; +  case Intrinsic::nvvm_suld_1d_array_v4i16_trap: +    return NVPTXISD::Suld1DArrayV4I16Trap; +  case Intrinsic::nvvm_suld_1d_array_v4i32_trap: +    return NVPTXISD::Suld1DArrayV4I32Trap; +  case Intrinsic::nvvm_suld_2d_i8_trap: +    return NVPTXISD::Suld2DI8Trap; +  case Intrinsic::nvvm_suld_2d_i16_trap: +    return NVPTXISD::Suld2DI16Trap; +  case Intrinsic::nvvm_suld_2d_i32_trap: +    return NVPTXISD::Suld2DI32Trap; +  case Intrinsic::nvvm_suld_2d_v2i8_trap: +    return NVPTXISD::Suld2DV2I8Trap; +  case Intrinsic::nvvm_suld_2d_v2i16_trap: +    return NVPTXISD::Suld2DV2I16Trap; +  case Intrinsic::nvvm_suld_2d_v2i32_trap: +    return NVPTXISD::Suld2DV2I32Trap; +  case Intrinsic::nvvm_suld_2d_v4i8_trap: +    return NVPTXISD::Suld2DV4I8Trap; +  case Intrinsic::nvvm_suld_2d_v4i16_trap: +    return NVPTXISD::Suld2DV4I16Trap; +  case Intrinsic::nvvm_suld_2d_v4i32_trap: +    return NVPTXISD::Suld2DV4I32Trap; +  case Intrinsic::nvvm_suld_2d_array_i8_trap: +    return NVPTXISD::Suld2DArrayI8Trap; +  case Intrinsic::nvvm_suld_2d_array_i16_trap: +    return NVPTXISD::Suld2DArrayI16Trap; +  case Intrinsic::nvvm_suld_2d_array_i32_trap: +    return NVPTXISD::Suld2DArrayI32Trap; +  case Intrinsic::nvvm_suld_2d_array_v2i8_trap: +    return NVPTXISD::Suld2DArrayV2I8Trap; +  case Intrinsic::nvvm_suld_2d_array_v2i16_trap: +    return NVPTXISD::Suld2DArrayV2I16Trap; +  case Intrinsic::nvvm_suld_2d_array_v2i32_trap: +    return NVPTXISD::Suld2DArrayV2I32Trap; +  case Intrinsic::nvvm_suld_2d_array_v4i8_trap: +    return NVPTXISD::Suld2DArrayV4I8Trap; +  case Intrinsic::nvvm_suld_2d_array_v4i16_trap: +    return NVPTXISD::Suld2DArrayV4I16Trap; +  case Intrinsic::nvvm_suld_2d_array_v4i32_trap: +    return NVPTXISD::Suld2DArrayV4I32Trap; +  case Intrinsic::nvvm_suld_3d_i8_trap: +    return NVPTXISD::Suld3DI8Trap; +  case Intrinsic::nvvm_suld_3d_i16_trap: +    return NVPTXISD::Suld3DI16Trap; +  case Intrinsic::nvvm_suld_3d_i32_trap: +    return NVPTXISD::Suld3DI32Trap; +  case Intrinsic::nvvm_suld_3d_v2i8_trap: +    return NVPTXISD::Suld3DV2I8Trap; +  case Intrinsic::nvvm_suld_3d_v2i16_trap: +    return NVPTXISD::Suld3DV2I16Trap; +  case Intrinsic::nvvm_suld_3d_v2i32_trap: +    return NVPTXISD::Suld3DV2I32Trap; +  case Intrinsic::nvvm_suld_3d_v4i8_trap: +    return NVPTXISD::Suld3DV4I8Trap; +  case Intrinsic::nvvm_suld_3d_v4i16_trap: +    return NVPTXISD::Suld3DV4I16Trap; +  case Intrinsic::nvvm_suld_3d_v4i32_trap: +    return NVPTXISD::Suld3DV4I32Trap; +  } +} +  // llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as  // TgtMemIntrinsic  // because we need the information that is only available in the "Value" type @@ -1944,6 +2237,142 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(      Info.align = 0;      return true; +  case Intrinsic::nvvm_tex_1d_v4f32_i32: +  case Intrinsic::nvvm_tex_1d_v4f32_f32: +  case Intrinsic::nvvm_tex_1d_level_v4f32_f32: +  case Intrinsic::nvvm_tex_1d_grad_v4f32_f32: +  case Intrinsic::nvvm_tex_1d_array_v4f32_i32: +  case Intrinsic::nvvm_tex_1d_array_v4f32_f32: +  case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32: +  case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32: +  case Intrinsic::nvvm_tex_2d_v4f32_i32: +  case Intrinsic::nvvm_tex_2d_v4f32_f32: +  case Intrinsic::nvvm_tex_2d_level_v4f32_f32: +  case Intrinsic::nvvm_tex_2d_grad_v4f32_f32: +  case Intrinsic::nvvm_tex_2d_array_v4f32_i32: +  case Intrinsic::nvvm_tex_2d_array_v4f32_f32: +  case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32: +  case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32: +  case Intrinsic::nvvm_tex_3d_v4f32_i32: +  case Intrinsic::nvvm_tex_3d_v4f32_f32: +  case Intrinsic::nvvm_tex_3d_level_v4f32_f32: +  case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: { +    Info.opc = getOpcForTextureInstr(Intrinsic); +    Info.memVT = MVT::f32; +    Info.ptrVal = nullptr; +    Info.offset = 0; +    Info.vol = 0; +    Info.readMem = true; +    Info.writeMem = false; +    Info.align = 16; +    return true; +  } +  case Intrinsic::nvvm_tex_1d_v4i32_i32: +  case Intrinsic::nvvm_tex_1d_v4i32_f32: +  case Intrinsic::nvvm_tex_1d_level_v4i32_f32: +  case Intrinsic::nvvm_tex_1d_grad_v4i32_f32: +  case Intrinsic::nvvm_tex_1d_array_v4i32_i32: +  case Intrinsic::nvvm_tex_1d_array_v4i32_f32: +  case Intrinsic::nvvm_tex_1d_array_level_v4i32_f32: +  case Intrinsic::nvvm_tex_1d_array_grad_v4i32_f32: +  case Intrinsic::nvvm_tex_2d_v4i32_i32: +  case Intrinsic::nvvm_tex_2d_v4i32_f32: +  case Intrinsic::nvvm_tex_2d_level_v4i32_f32: +  case Intrinsic::nvvm_tex_2d_grad_v4i32_f32: +  case Intrinsic::nvvm_tex_2d_array_v4i32_i32: +  case Intrinsic::nvvm_tex_2d_array_v4i32_f32: +  case Intrinsic::nvvm_tex_2d_array_level_v4i32_f32: +  case Intrinsic::nvvm_tex_2d_array_grad_v4i32_f32: +  case Intrinsic::nvvm_tex_3d_v4i32_i32: +  case Intrinsic::nvvm_tex_3d_v4i32_f32: +  case Intrinsic::nvvm_tex_3d_level_v4i32_f32: +  case Intrinsic::nvvm_tex_3d_grad_v4i32_f32: { +    Info.opc = getOpcForTextureInstr(Intrinsic); +    Info.memVT = MVT::i32; +    Info.ptrVal = nullptr; +    Info.offset = 0; +    Info.vol = 0; +    Info.readMem = true; +    Info.writeMem = false; +    Info.align = 16; +    return true; +  } +  case Intrinsic::nvvm_suld_1d_i8_trap: +  case Intrinsic::nvvm_suld_1d_v2i8_trap: +  case Intrinsic::nvvm_suld_1d_v4i8_trap: +  case Intrinsic::nvvm_suld_1d_array_i8_trap: +  case Intrinsic::nvvm_suld_1d_array_v2i8_trap: +  case Intrinsic::nvvm_suld_1d_array_v4i8_trap: +  case Intrinsic::nvvm_suld_2d_i8_trap: +  case Intrinsic::nvvm_suld_2d_v2i8_trap: +  case Intrinsic::nvvm_suld_2d_v4i8_trap: +  case Intrinsic::nvvm_suld_2d_array_i8_trap: +  case Intrinsic::nvvm_suld_2d_array_v2i8_trap: +  case Intrinsic::nvvm_suld_2d_array_v4i8_trap: +  case Intrinsic::nvvm_suld_3d_i8_trap: +  case Intrinsic::nvvm_suld_3d_v2i8_trap: +  case Intrinsic::nvvm_suld_3d_v4i8_trap: { +    Info.opc = getOpcForSurfaceInstr(Intrinsic); +    Info.memVT = MVT::i8; +    Info.ptrVal = nullptr; +    Info.offset = 0; +    Info.vol = 0; +    Info.readMem = true; +    Info.writeMem = false; +    Info.align = 16; +    return true; +  } +  case Intrinsic::nvvm_suld_1d_i16_trap: +  case Intrinsic::nvvm_suld_1d_v2i16_trap: +  case Intrinsic::nvvm_suld_1d_v4i16_trap: +  case Intrinsic::nvvm_suld_1d_array_i16_trap: +  case Intrinsic::nvvm_suld_1d_array_v2i16_trap: +  case Intrinsic::nvvm_suld_1d_array_v4i16_trap: +  case Intrinsic::nvvm_suld_2d_i16_trap: +  case Intrinsic::nvvm_suld_2d_v2i16_trap: +  case Intrinsic::nvvm_suld_2d_v4i16_trap: +  case Intrinsic::nvvm_suld_2d_array_i16_trap: +  case Intrinsic::nvvm_suld_2d_array_v2i16_trap: +  case Intrinsic::nvvm_suld_2d_array_v4i16_trap: +  case Intrinsic::nvvm_suld_3d_i16_trap: +  case Intrinsic::nvvm_suld_3d_v2i16_trap: +  case Intrinsic::nvvm_suld_3d_v4i16_trap: { +    Info.opc = getOpcForSurfaceInstr(Intrinsic); +    Info.memVT = MVT::i16; +    Info.ptrVal = nullptr; +    Info.offset = 0; +    Info.vol = 0; +    Info.readMem = true; +    Info.writeMem = false; +    Info.align = 16; +    return true; +  } +  case Intrinsic::nvvm_suld_1d_i32_trap: +  case Intrinsic::nvvm_suld_1d_v2i32_trap: +  case Intrinsic::nvvm_suld_1d_v4i32_trap: +  case Intrinsic::nvvm_suld_1d_array_i32_trap: +  case Intrinsic::nvvm_suld_1d_array_v2i32_trap: +  case Intrinsic::nvvm_suld_1d_array_v4i32_trap: +  case Intrinsic::nvvm_suld_2d_i32_trap: +  case Intrinsic::nvvm_suld_2d_v2i32_trap: +  case Intrinsic::nvvm_suld_2d_v4i32_trap: +  case Intrinsic::nvvm_suld_2d_array_i32_trap: +  case Intrinsic::nvvm_suld_2d_array_v2i32_trap: +  case Intrinsic::nvvm_suld_2d_array_v4i32_trap: +  case Intrinsic::nvvm_suld_3d_i32_trap: +  case Intrinsic::nvvm_suld_3d_v2i32_trap: +  case Intrinsic::nvvm_suld_3d_v4i32_trap: { +    Info.opc = getOpcForSurfaceInstr(Intrinsic); +    Info.memVT = MVT::i32; +    Info.ptrVal = nullptr; +    Info.offset = 0; +    Info.vol = 0; +    Info.readMem = true; +    Info.writeMem = false; +    Info.align = 16; +    return true; +  } +    }    return false;  } @@ -2094,7 +2523,7 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,    case 4: {      Opcode = NVPTXISD::LoadV4;      EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; -    LdResVTs = DAG.getVTList(ListVTs, 5); +    LdResVTs = DAG.getVTList(ListVTs);      break;    }    } @@ -2111,8 +2540,8 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,    // pass along the extension information    OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType())); -  SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, &OtherOps[0], -                                          OtherOps.size(), LD->getMemoryVT(), +  SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps, +                                          LD->getMemoryVT(),                                            LD->getMemOperand());    SmallVector<SDValue, 4> ScalarRes; @@ -2126,8 +2555,7 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,    SDValue LoadChain = NewLD.getValue(NumElts); -  SDValue BuildVec = -      DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts); +  SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, ScalarRes);    Results.push_back(BuildVec);    Results.push_back(LoadChain); @@ -2207,7 +2635,7 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG,            break;          }          EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; -        LdResVTs = DAG.getVTList(ListVTs, 5); +        LdResVTs = DAG.getVTList(ListVTs);          break;        }        } @@ -2224,9 +2652,9 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG,        MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N); -      SDValue NewLD = DAG.getMemIntrinsicNode( -          Opcode, DL, LdResVTs, &OtherOps[0], OtherOps.size(), -          MemSD->getMemoryVT(), MemSD->getMemOperand()); +      SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps, +                                              MemSD->getMemoryVT(), +                                              MemSD->getMemOperand());        SmallVector<SDValue, 4> ScalarRes; @@ -2241,7 +2669,7 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG,        SDValue LoadChain = NewLD.getValue(NumElts);        SDValue BuildVec = -          DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts); +          DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, ScalarRes);        Results.push_back(BuildVec);        Results.push_back(LoadChain); @@ -2263,8 +2691,8 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG,        // We make sure the memory type is i8, which will be used during isel        // to select the proper instruction.        SDValue NewLD = -          DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, &Ops[0], -                                  Ops.size(), MVT::i8, MemSD->getMemOperand()); +          DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, Ops, +                                  MVT::i8, MemSD->getMemOperand());        Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8,                                      NewLD.getValue(0))); diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h index c1e8c21..7bad8a2 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/lib/Target/NVPTX/NVPTXISelLowering.h @@ -70,7 +70,100 @@ enum NodeType {    StoreParamU32, // to zext and store a <32bit value, not used currently     StoreRetval,    StoreRetvalV2, -  StoreRetvalV4 +  StoreRetvalV4, + +  // Texture intrinsics +  Tex1DFloatI32, +  Tex1DFloatFloat, +  Tex1DFloatFloatLevel, +  Tex1DFloatFloatGrad, +  Tex1DI32I32, +  Tex1DI32Float, +  Tex1DI32FloatLevel, +  Tex1DI32FloatGrad, +  Tex1DArrayFloatI32, +  Tex1DArrayFloatFloat, +  Tex1DArrayFloatFloatLevel, +  Tex1DArrayFloatFloatGrad, +  Tex1DArrayI32I32, +  Tex1DArrayI32Float, +  Tex1DArrayI32FloatLevel, +  Tex1DArrayI32FloatGrad, +  Tex2DFloatI32, +  Tex2DFloatFloat, +  Tex2DFloatFloatLevel, +  Tex2DFloatFloatGrad, +  Tex2DI32I32, +  Tex2DI32Float, +  Tex2DI32FloatLevel, +  Tex2DI32FloatGrad, +  Tex2DArrayFloatI32, +  Tex2DArrayFloatFloat, +  Tex2DArrayFloatFloatLevel, +  Tex2DArrayFloatFloatGrad, +  Tex2DArrayI32I32, +  Tex2DArrayI32Float, +  Tex2DArrayI32FloatLevel, +  Tex2DArrayI32FloatGrad, +  Tex3DFloatI32, +  Tex3DFloatFloat, +  Tex3DFloatFloatLevel, +  Tex3DFloatFloatGrad, +  Tex3DI32I32, +  Tex3DI32Float, +  Tex3DI32FloatLevel, +  Tex3DI32FloatGrad, + +  // Surface intrinsics +  Suld1DI8Trap, +  Suld1DI16Trap, +  Suld1DI32Trap, +  Suld1DV2I8Trap, +  Suld1DV2I16Trap, +  Suld1DV2I32Trap, +  Suld1DV4I8Trap, +  Suld1DV4I16Trap, +  Suld1DV4I32Trap, + +  Suld1DArrayI8Trap, +  Suld1DArrayI16Trap, +  Suld1DArrayI32Trap, +  Suld1DArrayV2I8Trap, +  Suld1DArrayV2I16Trap, +  Suld1DArrayV2I32Trap, +  Suld1DArrayV4I8Trap, +  Suld1DArrayV4I16Trap, +  Suld1DArrayV4I32Trap, + +  Suld2DI8Trap, +  Suld2DI16Trap, +  Suld2DI32Trap, +  Suld2DV2I8Trap, +  Suld2DV2I16Trap, +  Suld2DV2I32Trap, +  Suld2DV4I8Trap, +  Suld2DV4I16Trap, +  Suld2DV4I32Trap, + +  Suld2DArrayI8Trap, +  Suld2DArrayI16Trap, +  Suld2DArrayI32Trap, +  Suld2DArrayV2I8Trap, +  Suld2DArrayV2I16Trap, +  Suld2DArrayV2I32Trap, +  Suld2DArrayV4I8Trap, +  Suld2DArrayV4I16Trap, +  Suld2DArrayV4I32Trap, + +  Suld3DI8Trap, +  Suld3DI16Trap, +  Suld3DI32Trap, +  Suld3DV2I8Trap, +  Suld3DV2I16Trap, +  Suld3DV2I32Trap, +  Suld3DV4I8Trap, +  Suld3DV4I16Trap, +  Suld3DV4I32Trap  };  } @@ -80,68 +173,70 @@ enum NodeType {  class NVPTXTargetLowering : public TargetLowering {  public:    explicit NVPTXTargetLowering(NVPTXTargetMachine &TM); -  virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; +  SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;    SDValue LowerGlobalAddress(const GlobalValue *GV, int64_t Offset,                               SelectionDAG &DAG) const; -  virtual const char *getTargetNodeName(unsigned Opcode) const; +  const char *getTargetNodeName(unsigned Opcode) const override;    bool isTypeSupportedInIntrinsic(MVT VT) const;    bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, -                          unsigned Intrinsic) const; +                          unsigned Intrinsic) const override;    /// isLegalAddressingMode - Return true if the addressing mode represented    /// by AM is legal for this target, for a load/store of the specified type    /// Used to guide target specific optimizations, like loop strength    /// reduction (LoopStrengthReduce.cpp) and memory optimization for    /// address mode (CodeGenPrepare.cpp) -  virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const; +  bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;    /// getFunctionAlignment - Return the Log2 alignment of this function. -  virtual unsigned getFunctionAlignment(const Function *F) const; +  unsigned getFunctionAlignment(const Function *F) const; -  virtual EVT getSetCCResultType(LLVMContext &, EVT VT) const { +  EVT getSetCCResultType(LLVMContext &, EVT VT) const override {      if (VT.isVector())        return MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());      return MVT::i1;    } -  ConstraintType getConstraintType(const std::string &Constraint) const; +  ConstraintType +  getConstraintType(const std::string &Constraint) const override;    std::pair<unsigned, const TargetRegisterClass *> -  getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const; +  getRegForInlineAsmConstraint(const std::string &Constraint, +                               MVT VT) const override; -  virtual SDValue LowerFormalArguments( +  SDValue LowerFormalArguments(        SDValue Chain, CallingConv::ID CallConv, bool isVarArg,        const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl, SelectionDAG &DAG, -      SmallVectorImpl<SDValue> &InVals) const; +      SmallVectorImpl<SDValue> &InVals) const override; -  virtual SDValue -  LowerCall(CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const; +  SDValue LowerCall(CallLoweringInfo &CLI, +                    SmallVectorImpl<SDValue> &InVals) const override;    std::string getPrototype(Type *, const ArgListTy &,                             const SmallVectorImpl<ISD::OutputArg> &,                             unsigned retAlignment,                             const ImmutableCallSite *CS) const; -  virtual SDValue +  SDValue    LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,                const SmallVectorImpl<ISD::OutputArg> &Outs,                const SmallVectorImpl<SDValue> &OutVals, SDLoc dl, -              SelectionDAG &DAG) const; +              SelectionDAG &DAG) const override; -  virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, -                                            std::vector<SDValue> &Ops, -                                            SelectionDAG &DAG) const; +  void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, +                                    std::vector<SDValue> &Ops, +                                    SelectionDAG &DAG) const override;    NVPTXTargetMachine *nvTM;    // PTX always uses 32-bit shift amounts -  virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; } +  MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; } -  virtual bool shouldSplitVectorType(EVT VT) const override; +  bool shouldSplitVectorType(EVT VT) const override;  private:    const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here @@ -160,8 +255,8 @@ private:    SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const;    SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const; -  virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, -                                  SelectionDAG &DAG) const; +  void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, +                          SelectionDAG &DAG) const override;    unsigned getArgumentAlignment(SDValue Callee, const ImmutableCallSite *CS,                                  Type *Ty, unsigned Idx) const; diff --git a/lib/Target/NVPTX/NVPTXImageOptimizer.cpp b/lib/Target/NVPTX/NVPTXImageOptimizer.cpp new file mode 100644 index 0000000..397f4bc --- /dev/null +++ b/lib/Target/NVPTX/NVPTXImageOptimizer.cpp @@ -0,0 +1,178 @@ +//===-- NVPTXImageOptimizer.cpp - Image optimization pass -----------------===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source  +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass implements IR-level optimizations of image access code, +// including: +// +// 1. Eliminate istypep intrinsics when image access qualifier is known +// +//===----------------------------------------------------------------------===// + +#include "NVPTX.h" +#include "NVPTXUtilities.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/ConstantFolding.h" + +using namespace llvm; + +namespace { +class NVPTXImageOptimizer : public FunctionPass { +private: +  static char ID; +  SmallVector<Instruction*, 4> InstrToDelete; + +public: +  NVPTXImageOptimizer(); + +  bool runOnFunction(Function &F) override; + +private: +  bool replaceIsTypePSampler(Instruction &I); +  bool replaceIsTypePSurface(Instruction &I); +  bool replaceIsTypePTexture(Instruction &I); +  Value *cleanupValue(Value *V); +  void replaceWith(Instruction *From, ConstantInt *To); +}; +} + +char NVPTXImageOptimizer::ID = 0; + +NVPTXImageOptimizer::NVPTXImageOptimizer() +  : FunctionPass(ID) {} + +bool NVPTXImageOptimizer::runOnFunction(Function &F) { +  bool Changed = false; +  InstrToDelete.clear(); + +  // Look for call instructions in the function +  for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; +       ++BI) { +    for (BasicBlock::iterator I = (*BI).begin(), E = (*BI).end(); +         I != E; ++I) { +      Instruction &Instr = *I; +      if (CallInst *CI = dyn_cast<CallInst>(I)) { +        Function *CalledF = CI->getCalledFunction(); +        if (CalledF && CalledF->isIntrinsic()) { +          // This is an intrinsic function call, check if its an istypep +          switch (CalledF->getIntrinsicID()) { +          default: break; +          case Intrinsic::nvvm_istypep_sampler: +            Changed |= replaceIsTypePSampler(Instr); +            break; +          case Intrinsic::nvvm_istypep_surface: +            Changed |= replaceIsTypePSurface(Instr); +            break; +          case Intrinsic::nvvm_istypep_texture: +            Changed |= replaceIsTypePTexture(Instr); +            break; +          } +        } +      } +    } +  } + +  // Delete any istypep instances we replaced in the IR +  for (unsigned i = 0, e = InstrToDelete.size(); i != e; ++i) +    InstrToDelete[i]->eraseFromParent(); + +  return Changed; +} + +bool NVPTXImageOptimizer::replaceIsTypePSampler(Instruction &I) { +  Value *TexHandle = cleanupValue(I.getOperand(0)); +  if (isSampler(*TexHandle)) { +    // This is an OpenCL sampler, so it must be a samplerref +    replaceWith(&I, ConstantInt::getTrue(I.getContext())); +    return true; +  } else if (isImageWriteOnly(*TexHandle) || +             isImageReadWrite(*TexHandle) || +             isImageReadOnly(*TexHandle)) { +    // This is an OpenCL image, so it cannot be a samplerref +    replaceWith(&I, ConstantInt::getFalse(I.getContext())); +    return true; +  } else { +    // The image type is unknown, so we cannot eliminate the intrinsic +    return false; +  } +} + +bool NVPTXImageOptimizer::replaceIsTypePSurface(Instruction &I) { +  Value *TexHandle = cleanupValue(I.getOperand(0)); +  if (isImageReadWrite(*TexHandle) || +      isImageWriteOnly(*TexHandle)) { +    // This is an OpenCL read-only/read-write image, so it must be a surfref +    replaceWith(&I, ConstantInt::getTrue(I.getContext())); +    return true; +  } else if (isImageReadOnly(*TexHandle) || +             isSampler(*TexHandle)) { +    // This is an OpenCL read-only/ imageor sampler, so it cannot be +    // a surfref +    replaceWith(&I, ConstantInt::getFalse(I.getContext())); +    return true; +  } else { +    // The image type is unknown, so we cannot eliminate the intrinsic +    return false; +  } +} + +bool NVPTXImageOptimizer::replaceIsTypePTexture(Instruction &I) { +  Value *TexHandle = cleanupValue(I.getOperand(0)); +  if (isImageReadOnly(*TexHandle)) { +    // This is an OpenCL read-only image, so it must be a texref +    replaceWith(&I, ConstantInt::getTrue(I.getContext())); +    return true; +  } else if (isImageWriteOnly(*TexHandle) || +             isImageReadWrite(*TexHandle) || +             isSampler(*TexHandle)) { +    // This is an OpenCL read-write/write-only image or a sampler, so it +    // cannot be a texref +    replaceWith(&I, ConstantInt::getFalse(I.getContext())); +    return true; +  } else { +    // The image type is unknown, so we cannot eliminate the intrinsic +    return false; +  } +} + +void NVPTXImageOptimizer::replaceWith(Instruction *From, ConstantInt *To) { +  // We implement "poor man's DCE" here to make sure any code that is no longer +  // live is actually unreachable and can be trivially eliminated by the +  // unreachable block elimiation pass. +  for (CallInst::use_iterator UI = From->use_begin(), UE = From->use_end(); +       UI != UE; ++UI) { +    if (BranchInst *BI = dyn_cast<BranchInst>(*UI)) { +      if (BI->isUnconditional()) continue; +      BasicBlock *Dest; +      if (To->isZero()) +        // Get false block +        Dest = BI->getSuccessor(1); +      else +        // Get true block +        Dest = BI->getSuccessor(0); +      BranchInst::Create(Dest, BI); +      InstrToDelete.push_back(BI); +    } +  } +  From->replaceAllUsesWith(To); +  InstrToDelete.push_back(From); +} + +Value *NVPTXImageOptimizer::cleanupValue(Value *V) { +  if (ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(V)) { +    return cleanupValue(EVI->getAggregateOperand()); +  } +  return V; +} + +FunctionPass *llvm::createNVPTXImageOptimizerPass() { +  return new NVPTXImageOptimizer(); +} diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp index 86ddd38..cdc8088 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp +++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp @@ -14,8 +14,6 @@  #include "NVPTX.h"  #include "NVPTXInstrInfo.h"  #include "NVPTXTargetMachine.h" -#define GET_INSTRINFO_CTOR_DTOR -#include "NVPTXGenInstrInfo.inc"  #include "llvm/IR/Function.h"  #include "llvm/ADT/STLExtras.h"  #include "llvm/CodeGen/MachineFunction.h" @@ -24,6 +22,9 @@  using namespace llvm; +#define GET_INSTRINFO_CTOR_DTOR +#include "NVPTXGenInstrInfo.inc" +  // Pin the vtable to this file.  void NVPTXInstrInfo::anchor() {} @@ -256,7 +257,7 @@ unsigned NVPTXInstrInfo::InsertBranch(           "NVPTX branch conditions have two components!");    // One-way branch. -  if (FBB == 0) { +  if (!FBB) {      if (Cond.empty()) // Unconditional branch        BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(TBB);      else // Conditional branch diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.h b/lib/Target/NVPTX/NVPTXInstrInfo.h index 600fc5c..88a9e45 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.h +++ b/lib/Target/NVPTX/NVPTXInstrInfo.h @@ -30,7 +30,7 @@ class NVPTXInstrInfo : public NVPTXGenInstrInfo {  public:    explicit NVPTXInstrInfo(NVPTXTargetMachine &TM); -  virtual const NVPTXRegisterInfo &getRegisterInfo() const { return RegInfo; } +  const NVPTXRegisterInfo &getRegisterInfo() const { return RegInfo; }    /* The following virtual functions are used in register allocation.     * They are not implemented because the existing interface and the logic @@ -50,9 +50,9 @@ public:     *                               const TargetRegisterClass *RC) const;     */ -  virtual void copyPhysReg( +  void copyPhysReg(        MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, -      unsigned DestReg, unsigned SrcReg, bool KillSrc) const; +      unsigned DestReg, unsigned SrcReg, bool KillSrc) const override;    virtual bool isMoveInstr(const MachineInstr &MI, unsigned &SrcReg,                             unsigned &DestReg) const;    bool isLoadInstr(const MachineInstr &MI, unsigned &AddrSpace) const; @@ -61,13 +61,13 @@ public:    virtual bool CanTailMerge(const MachineInstr *MI) const;    // Branch analysis. -  virtual bool AnalyzeBranch( +  bool AnalyzeBranch(        MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, -      SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const; -  virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; -  virtual unsigned InsertBranch( +      SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const override; +  unsigned RemoveBranch(MachineBasicBlock &MBB) const override; +  unsigned InsertBranch(        MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, -      const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const; +      const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const override;    unsigned getLdStCodeAddrSpace(const MachineInstr &MI) const {      return MI.getOperand(2).getImm();    } diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td index 14049b1..5e228fc 100644 --- a/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -1666,6 +1666,9 @@ def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen                  (MoveParam texternalsym:$src)))),                 (nvvm_move_ptr32  texternalsym:$src)>; +def texsurf_handles +  : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src), +              "mov.u64 \t$result, $src;", []>;  //-----------------------------------  // Compiler Error Warn @@ -1686,6 +1689,1826 @@ def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),                  [(int_nvvm_compiler_error Int64Regs:$a)]>; +//----------------------------------- +// Texture Intrinsics +//----------------------------------- + +// NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be +// also defined in NVPTXReplaceImageHandles.cpp + + +// Texture fetch instructions using handles +def TEX_1D_F32_I32 +  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, +                    Float32Regs:$b, Float32Regs:$a), +              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x), +              "tex.1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", +              []>; +def TEX_1D_F32_F32 +  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, +                    Float32Regs:$b, Float32Regs:$a), +              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x), +              "tex.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", +              []>; +def TEX_1D_F32_F32_LEVEL +  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, +                    Float32Regs:$b, Float32Regs:$a), +              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$lod), +              "tex.level.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " +              "[$t, $s, \\{$x\\}], $lod;", +              []>; +def TEX_1D_F32_F32_GRAD +  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, +                    Float32Regs:$b, Float32Regs:$a), +              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, +                   Float32Regs:$gradx, Float32Regs:$grady), +              "tex.grad.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " +              "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", +              []>; +def TEX_1D_I32_I32 +  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, +                    Int32Regs:$b, Int32Regs:$a), +              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x), +              "tex.1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", +              []>; +def TEX_1D_I32_F32 +  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, +                    Int32Regs:$b, Int32Regs:$a), +              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x), +              "tex.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", +              []>; +def TEX_1D_I32_F32_LEVEL +  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, +                    Int32Regs:$b, Int32Regs:$a), +              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, +                   Float32Regs:$lod), +              "tex.level.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " +              "[$t, $s, \\{$x\\}], $lod;", +              []>; +def TEX_1D_I32_F32_GRAD +  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, +                    Int32Regs:$b, Int32Regs:$a), +              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, +                   Float32Regs:$gradx, Float32Regs:$grady), +              "tex.grad.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " +              "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", +              []>; + +def TEX_1D_ARRAY_F32_I32 +  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, +                    Float32Regs:$b, Float32Regs:$a), +              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), +              "tex.a1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, " +              "[$t, $s, \\{$l, $x\\}];", +              []>; +def TEX_1D_ARRAY_F32_F32 +  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, +                    Float32Regs:$b, Float32Regs:$a), +              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x), +              "tex.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " +              "[$t, $s, \\{$l, $x\\}];", +              []>; +def TEX_1D_ARRAY_F32_F32_LEVEL +  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, +                    Float32Regs:$b, Float32Regs:$a), +              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, +                   Float32Regs:$lod), +              "tex.level.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " +              "[$t, $s, \\{$l, $x\\}], $lod;", +              []>; +def TEX_1D_ARRAY_F32_F32_GRAD +  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, +                    Float32Regs:$b, Float32Regs:$a), +              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, +                   Float32Regs:$gradx, Float32Regs:$grady), +              "tex.grad.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " +              "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", +              []>; +def TEX_1D_ARRAY_I32_I32 +  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, +                    Int32Regs:$b, Int32Regs:$a), +              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), +              "tex.a1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, " +              "[$t, $s, \\{$l, $x\\}];", +              []>; +def TEX_1D_ARRAY_I32_F32 +  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, +                    Int32Regs:$b, Int32Regs:$a), +              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x), +              "tex.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " +              "[$t, $s, \\{$l, $x\\}];", +              []>; +def TEX_1D_ARRAY_I32_F32_LEVEL +  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, +                    Int32Regs:$b, Int32Regs:$a), +              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, +                   Float32Regs:$lod), +              "tex.level.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " +              "[$t, $s, \\{$l, $x\\}], $lod;", +              []>; +def TEX_1D_ARRAY_I32_F32_GRAD +  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, +                    Int32Regs:$b, Int32Regs:$a), +              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, +                   Float32Regs:$gradx, Float32Regs:$grady), +              "tex.grad.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " +              "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", +              []>; + +def TEX_2D_F32_I32 +  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, +                    Float32Regs:$b, Float32Regs:$a), +              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), +              "tex.2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, " +              "[$t, $s, \\{$x, $y\\}];", +              []>; +def TEX_2D_F32_F32 +  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, +                    Float32Regs:$b, Float32Regs:$a), +              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), +              "tex.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " +              "[$t, $s, \\{$x, $y\\}];", +              []>; +def TEX_2D_F32_F32_LEVEL +  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, +                    Float32Regs:$b, Float32Regs:$a), +              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, +                   Float32Regs:$lod), +              "tex.level.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " +              "[$t, $s, \\{$x, $y\\}], $lod;", +              []>; +def TEX_2D_F32_F32_GRAD +  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, +                    Float32Regs:$b, Float32Regs:$a), +              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, +                   Float32Regs:$gradx0, Float32Regs:$gradx1, +                   Float32Regs:$grady0, Float32Regs:$grady1), +              "tex.grad.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " +              "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " +              "\\{$grady0, $grady1\\};", +              []>; +def TEX_2D_I32_I32 +  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, +                    Int32Regs:$b, Int32Regs:$a), +              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), +              "tex.2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, " +              "[$t, $s, \\{$x, $y\\}];", +              []>; +def TEX_2D_I32_F32 +  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, +                    Int32Regs:$b, Int32Regs:$a), +              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), +              "tex.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " +              "[$t, $s, \\{$x, $y\\}];", +              []>; +def TEX_2D_I32_F32_LEVEL +  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, +                    Int32Regs:$b, Int32Regs:$a), +              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, +                   Float32Regs:$lod), +              "tex.level.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " +              "[$t, $s, \\{$x, $y\\}], $lod;", +              []>; +def TEX_2D_I32_F32_GRAD +  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, +                    Int32Regs:$b, Int32Regs:$a), +              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, +                   Float32Regs:$gradx0, Float32Regs:$gradx1, +                   Float32Regs:$grady0, Float32Regs:$grady1), +              "tex.grad.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " +              "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " +              "\\{$grady0, $grady1\\};", +              []>; + +def TEX_2D_ARRAY_F32_I32 +  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, +                    Float32Regs:$b, Float32Regs:$a), +              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, +                   Int32Regs:$y), +              "tex.a2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, " +              "[$t, $s, \\{$l, $x, $y, $y\\}];", +              []>; +def TEX_2D_ARRAY_F32_F32 +  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, +                    Float32Regs:$b, Float32Regs:$a), +              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, +                   Float32Regs:$y), +              "tex.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " +              "[$t, $s, \\{$l, $x, $y, $y\\}];", +              []>; +def TEX_2D_ARRAY_F32_F32_LEVEL +  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, +                    Float32Regs:$b, Float32Regs:$a), +              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, +                   Float32Regs:$y, Float32Regs:$lod), +              "tex.level.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " +              "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;", +              []>; +def TEX_2D_ARRAY_F32_F32_GRAD +  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, +                    Float32Regs:$b, Float32Regs:$a), +              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, +                   Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1, +                   Float32Regs:$grady0, Float32Regs:$grady1), +              "tex.grad.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " +              "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " +              "\\{$grady0, $grady1\\};", +              []>; +def TEX_2D_ARRAY_I32_I32 +  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, +                    Int32Regs:$b, Int32Regs:$a), +              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, +                   Int32Regs:$y), +              "tex.a2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, " +              "[$t, $s, \\{$l, $x, $y, $y\\}];", +              []>; +def TEX_2D_ARRAY_I32_F32 +  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, +                    Int32Regs:$b, Int32Regs:$a), +              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, +                   Float32Regs:$y), +              "tex.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " +              "[$t, $s, \\{$l, $x, $y, $y\\}];", +              []>; +def TEX_2D_ARRAY_I32_F32_LEVEL +  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, +                    Int32Regs:$b, Int32Regs:$a), +              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, +                   Float32Regs:$y, Float32Regs:$lod), +              "tex.level.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " +              "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;", +              []>; +def TEX_2D_ARRAY_I32_F32_GRAD +  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, +                    Int32Regs:$b, Int32Regs:$a), +              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, +                   Float32Regs:$y, +                   Float32Regs:$gradx0, Float32Regs:$gradx1, +                   Float32Regs:$grady0, Float32Regs:$grady1), +              "tex.grad.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " +              "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " +              "\\{$grady0, $grady1\\};", +              []>; + +def TEX_3D_F32_I32 +  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, +                    Float32Regs:$b, Float32Regs:$a), +              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, +                   Int32Regs:$z), +              "tex.3d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, " +              "[$t, $s, \\{$x, $y, $z, $z\\}];", +              []>; +def TEX_3D_F32_F32 +  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, +                    Float32Regs:$b, Float32Regs:$a), +              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, +                   Float32Regs:$z), +              "tex.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " +              "[$t, $s, \\{$x, $y, $z, $z\\}];", +              []>; +def TEX_3D_F32_F32_LEVEL +  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, +                    Float32Regs:$b, Float32Regs:$a), +              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, +                   Float32Regs:$z, Float32Regs:$lod), +              "tex.level.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " +              "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", +              []>; +def TEX_3D_F32_F32_GRAD +  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, +                    Float32Regs:$b, Float32Regs:$a), +              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, +                   Float32Regs:$z, +                   Float32Regs:$gradx0, Float32Regs:$gradx1, +                   Float32Regs:$gradx2, Float32Regs:$grady0, +                   Float32Regs:$grady1, Float32Regs:$grady2), +              "tex.grad.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " +              "[$t, $s, \\{$x, $y, $z, $z\\}], " +              "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " +              "\\{$grady0, $grady1, $grady2, $grady2\\};", +              []>; +def TEX_3D_I32_I32 +  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, +                    Int32Regs:$b, Int32Regs:$a), +              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, +                   Int32Regs:$z), +              "tex.3d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, " +              "[$t, $s, \\{$x, $y, $z, $z\\}];", +              []>; +def TEX_3D_I32_F32 +  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, +                    Int32Regs:$b, Int32Regs:$a), +              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, +                   Float32Regs:$z), +              "tex.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " +              "[$t, $s, \\{$x, $y, $z, $z\\}];", +              []>; +def TEX_3D_I32_F32_LEVEL +  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, +                    Int32Regs:$b, Int32Regs:$a), +              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, +                   Float32Regs:$z, Float32Regs:$lod), +              "tex.level.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " +              "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", +              []>; +def TEX_3D_I32_F32_GRAD +  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, +                    Int32Regs:$b, Int32Regs:$a), +              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, +                   Float32Regs:$z, +                   Float32Regs:$gradx0, Float32Regs:$gradx1, +                   Float32Regs:$gradx2, Float32Regs:$grady0, +                   Float32Regs:$grady1, Float32Regs:$grady2), +              "tex.grad.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " +              "[$t, $s, \\{$x, $y, $z, $z\\}], " +              "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " +              "\\{$grady0, $grady1, $grady2, $grady2\\};", +              []>; + + +// Surface load instructions +def SULD_1D_I8_TRAP +  : NVPTXInst<(outs Int16Regs:$r), +              (ins Int64Regs:$s, Int32Regs:$x), +              "suld.b.1d.b8.trap \\{$r\\}, [$s, \\{$x\\}];", +              []>; +def SULD_1D_I16_TRAP +  : NVPTXInst<(outs Int16Regs:$r), +              (ins Int64Regs:$s, Int32Regs:$x), +              "suld.b.1d.b16.trap \\{$r\\}, [$s, \\{$x\\}];", +              []>; +def SULD_1D_I32_TRAP +  : NVPTXInst<(outs Int32Regs:$r), +              (ins Int64Regs:$s, Int32Regs:$x), +              "suld.b.1d.b32.trap \\{$r\\}, [$s, \\{$x\\}];", +              []>; +def SULD_1D_V2I8_TRAP +  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), +              (ins Int64Regs:$s, Int32Regs:$x), +              "suld.b.1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x\\}];", +              []>; +def SULD_1D_V2I16_TRAP +  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), +              (ins Int64Regs:$s, Int32Regs:$x), +              "suld.b.1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x\\}];", +              []>; +def SULD_1D_V2I32_TRAP +  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), +              (ins Int64Regs:$s, Int32Regs:$x), +              "suld.b.1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x\\}];", +              []>; +def SULD_1D_V4I8_TRAP +  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +              (ins Int64Regs:$s, Int32Regs:$x), +              "suld.b.1d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", +              []>; +def SULD_1D_V4I16_TRAP +  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +              (ins Int64Regs:$s, Int32Regs:$x), +              "suld.b.1d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", +              []>; +def SULD_1D_V4I32_TRAP +  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), +              (ins Int64Regs:$s, Int32Regs:$x), +              "suld.b.1d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", +              []>; + +def SULD_1D_ARRAY_I8_TRAP +  : NVPTXInst<(outs Int16Regs:$r), +              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), +              "suld.b.a1d.b8.trap \\{$r\\}, [$s, \\{$l, $x\\}];", +              []>; +def SULD_1D_ARRAY_I16_TRAP +  : NVPTXInst<(outs Int16Regs:$r), +              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), +              "suld.b.a1d.b16.trap \\{$r\\}, [$s, \\{$l, $x\\}];", +              []>; +def SULD_1D_ARRAY_I32_TRAP +  : NVPTXInst<(outs Int32Regs:$r), +              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), +              "suld.b.a1d.b32.trap \\{$r\\}, [$s, \\{$l, $x\\}];", +              []>; +def SULD_1D_ARRAY_V2I8_TRAP +  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), +              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), +              "suld.b.a1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", +              []>; +def SULD_1D_ARRAY_V2I16_TRAP +  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), +              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), +              "suld.b.a1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", +              []>; +def SULD_1D_ARRAY_V2I32_TRAP +  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), +              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), +              "suld.b.a1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", +              []>; +def SULD_1D_ARRAY_V4I8_TRAP +  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), +              "suld.b.a1d.v4.b8.trap \\{$r, $g, $b, $a\\}, " +              "[$s, \\{$l, $x\\}];", +              []>; +def SULD_1D_ARRAY_V4I16_TRAP +  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), +              "suld.b.a1d.v4.b16.trap \\{$r, $g, $b, $a\\}, " +              "[$s, \\{$l, $x\\}];", +              []>; +def SULD_1D_ARRAY_V4I32_TRAP +  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), +              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), +              "suld.b.a1d.v4.b32.trap \\{$r, $g, $b, $a\\}, " +              "[$s, \\{$l, $x\\}];", +              []>; + +def SULD_2D_I8_TRAP +  : NVPTXInst<(outs Int16Regs:$r), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), +              "suld.b.2d.b8.trap \\{$r\\}, [$s, \\{$x, $y\\}];", +              []>; +def SULD_2D_I16_TRAP +  : NVPTXInst<(outs Int16Regs:$r), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), +              "suld.b.2d.b16.trap \\{$r\\}, [$s, \\{$x, $y\\}];", +              []>; +def SULD_2D_I32_TRAP +  : NVPTXInst<(outs Int32Regs:$r), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), +              "suld.b.2d.b32.trap \\{$r\\}, [$s, \\{$x, $y\\}];", +              []>; +def SULD_2D_V2I8_TRAP +  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), +              "suld.b.2d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", +              []>; +def SULD_2D_V2I16_TRAP +  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), +              "suld.b.2d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", +              []>; +def SULD_2D_V2I32_TRAP +  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), +              "suld.b.2d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", +              []>; +def SULD_2D_V4I8_TRAP +  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), +              "suld.b.2d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", +              []>; +def SULD_2D_V4I16_TRAP +  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), +              "suld.b.2d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", +              []>; +def SULD_2D_V4I32_TRAP +  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), +              "suld.b.2d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", +              []>; + +def SULD_2D_ARRAY_I8_TRAP +  : NVPTXInst<(outs Int16Regs:$r), +              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), +              "suld.b.a2d.b8.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", +              []>; +def SULD_2D_ARRAY_I16_TRAP +  : NVPTXInst<(outs Int16Regs:$r), +              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), +              "suld.b.a2d.b16.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", +              []>; +def SULD_2D_ARRAY_I32_TRAP +  : NVPTXInst<(outs Int32Regs:$r), +              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), +              "suld.b.a2d.b32.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", +              []>; +def SULD_2D_ARRAY_V2I8_TRAP +  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), +              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), +              "suld.b.a2d.v2.b8.trap \\{$r, $g\\}, " +              "[$s, \\{$l, $x, $y, $y\\}];", +              []>; +def SULD_2D_ARRAY_V2I16_TRAP +  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), +              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), +              "suld.b.a2d.v2.b16.trap \\{$r, $g\\}, " +              "[$s, \\{$l, $x, $y, $y\\}];", +              []>; +def SULD_2D_ARRAY_V2I32_TRAP +  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), +              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), +              "suld.b.a2d.v2.b32.trap \\{$r, $g\\}, " +              "[$s, \\{$l, $x, $y, $y\\}];", +              []>; +def SULD_2D_ARRAY_V4I8_TRAP +  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), +              "suld.b.a2d.v4.b8.trap \\{$r, $g, $b, $a\\}, " +              "[$s, \\{$l, $x, $y, $y\\}];", +              []>; +def SULD_2D_ARRAY_V4I16_TRAP +  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), +              "suld.b.a2d.v4.b16.trap \\{$r, $g, $b, $a\\}, " +              "[$s, \\{$l, $x, $y, $y\\}];", +              []>; +def SULD_2D_ARRAY_V4I32_TRAP +  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), +              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), +              "suld.b.a2d.v4.b32.trap \\{$r, $g, $b, $a\\}, " +              "[$s, \\{$l, $x, $y, $y\\}];", +              []>; + +def SULD_3D_I8_TRAP +  : NVPTXInst<(outs Int16Regs:$r), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), +              "suld.b.3d.b8.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", +              []>; +def SULD_3D_I16_TRAP +  : NVPTXInst<(outs Int16Regs:$r), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), +              "suld.b.3d.b16.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", +              []>; +def SULD_3D_I32_TRAP +  : NVPTXInst<(outs Int32Regs:$r), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), +              "suld.b.3d.b32.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", +              []>; +def SULD_3D_V2I8_TRAP +  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), +              "suld.b.3d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", +              []>; +def SULD_3D_V2I16_TRAP +  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), +              "suld.b.3d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", +              []>; +def SULD_3D_V2I32_TRAP +  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), +              "suld.b.3d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", +              []>; +def SULD_3D_V4I8_TRAP +  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), +              "suld.b.3d.v4.b8.trap \\{$r, $g, $b, $a\\}, " +              "[$s, \\{$x, $y, $z, $z\\}];", +              []>; +def SULD_3D_V4I16_TRAP +  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), +              "suld.b.3d.v4.b16.trap \\{$r, $g, $b, $a\\}, " +              "[$s, \\{$x, $y, $z, $z\\}];", +              []>; +def SULD_3D_V4I32_TRAP +  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), +              "suld.b.3d.v4.b32.trap \\{$r, $g, $b, $a\\}, " +              "[$s, \\{$x, $y, $z, $z\\}];", +              []>; + + +//----------------------------------- +// Texture Query Intrinsics +//----------------------------------- +def TXQ_CHANNEL_ORDER +  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), +              "txq.channel_order.b32 \t$d, [$a];", +              []>; +def TXQ_CHANNEL_DATA_TYPE +  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), +              "txq.channel_data_type.b32 \t$d, [$a];", +              []>; +def TXQ_WIDTH +  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), +              "txq.width.b32 \t$d, [$a];", +              []>; +def TXQ_HEIGHT +  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), +              "txq.height.b32 \t$d, [$a];", +              []>; +def TXQ_DEPTH +  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), +              "txq.depth.b32 \t$d, [$a];", +              []>; +def TXQ_ARRAY_SIZE +  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), +              "txq.array_size.b32 \t$d, [$a];", +              []>; +def TXQ_NUM_SAMPLES +  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), +              "txq.num_samples.b32 \t$d, [$a];", +              []>; +def TXQ_NUM_MIPMAP_LEVELS +  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), +              "txq.num_mipmap_levels.b32 \t$d, [$a];", +              []>; + +def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a), +          (TXQ_CHANNEL_ORDER Int64Regs:$a)>; +def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a), +          (TXQ_CHANNEL_DATA_TYPE Int64Regs:$a)>; +def : Pat<(int_nvvm_txq_width Int64Regs:$a), +          (TXQ_WIDTH Int64Regs:$a)>; +def : Pat<(int_nvvm_txq_height Int64Regs:$a), +          (TXQ_HEIGHT Int64Regs:$a)>; +def : Pat<(int_nvvm_txq_depth Int64Regs:$a), +          (TXQ_DEPTH Int64Regs:$a)>; +def : Pat<(int_nvvm_txq_array_size Int64Regs:$a), +          (TXQ_ARRAY_SIZE Int64Regs:$a)>; +def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a), +          (TXQ_NUM_SAMPLES Int64Regs:$a)>; +def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a), +          (TXQ_NUM_MIPMAP_LEVELS Int64Regs:$a)>; + + +//----------------------------------- +// Surface Query Intrinsics +//----------------------------------- +def SUQ_CHANNEL_ORDER +  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), +              "suq.channel_order.b32 \t$d, [$a];", +              []>; +def SUQ_CHANNEL_DATA_TYPE +  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), +              "suq.channel_data_type.b32 \t$d, [$a];", +              []>; +def SUQ_WIDTH +  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), +              "suq.width.b32 \t$d, [$a];", +              []>; +def SUQ_HEIGHT +  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), +              "suq.height.b32 \t$d, [$a];", +              []>; +def SUQ_DEPTH +  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), +              "suq.depth.b32 \t$d, [$a];", +              []>; +def SUQ_ARRAY_SIZE +  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), +              "suq.array_size.b32 \t$d, [$a];", +              []>; + +def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a), +          (SUQ_CHANNEL_ORDER Int64Regs:$a)>; +def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a), +          (SUQ_CHANNEL_DATA_TYPE Int64Regs:$a)>; +def : Pat<(int_nvvm_suq_width Int64Regs:$a), +          (SUQ_WIDTH Int64Regs:$a)>; +def : Pat<(int_nvvm_suq_height Int64Regs:$a), +          (SUQ_HEIGHT Int64Regs:$a)>; +def : Pat<(int_nvvm_suq_depth Int64Regs:$a), +          (SUQ_DEPTH Int64Regs:$a)>; +def : Pat<(int_nvvm_suq_array_size Int64Regs:$a), +          (SUQ_ARRAY_SIZE Int64Regs:$a)>; + + +//===- Handle Query -------------------------------------------------------===// + +// TODO: These intrinsics are not yet finalized, pending PTX ISA design work +def ISTYPEP_SAMPLER +  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), +              "istypep.samplerref \t$d, $a;", +              [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>; +def ISTYPEP_SURFACE +  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), +              "istypep.surfref \t$d, $a;", +              [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>; +def ISTYPEP_TEXTURE +  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), +              "istypep.texref \t$d, $a;", +              [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>; + +//===- Surface Stores -----------------------------------------------------===// + +// Unformatted + +def SUST_B_1D_B8_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), +              "sust.b.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};", +              []>; +def SUST_B_1D_B16_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), +              "sust.b.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};", +              []>; +def SUST_B_1D_B32_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), +              "sust.b.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};", +              []>; +def SUST_B_1D_V2B8_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), +              "sust.b.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", +              []>; +def SUST_B_1D_V2B16_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), +              "sust.b.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", +              []>; +def SUST_B_1D_V2B32_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), +              "sust.b.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", +              []>; +def SUST_B_1D_V4B8_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, +                   Int16Regs:$b, Int16Regs:$a), +              "sust.b.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", +              []>; +def SUST_B_1D_V4B16_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, +                   Int16Regs:$b, Int16Regs:$a), +              "sust.b.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", +              []>; +def SUST_B_1D_V4B32_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, +                   Int32Regs:$b, Int32Regs:$a), +              "sust.b.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", +              []>; + + +def SUST_B_1D_ARRAY_B8_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), +              "sust.b.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", +              []>; +def SUST_B_1D_ARRAY_B16_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), +              "sust.b.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", +              []>; +def SUST_B_1D_ARRAY_B32_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r), +              "sust.b.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", +              []>; +def SUST_B_1D_ARRAY_V2B8_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, +                   Int16Regs:$g), +              "sust.b.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", +              []>; +def SUST_B_1D_ARRAY_V2B16_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, +                   Int16Regs:$g), +              "sust.b.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", +              []>; +def SUST_B_1D_ARRAY_V2B32_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, +                   Int32Regs:$g), +              "sust.b.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", +              []>; +def SUST_B_1D_ARRAY_V4B8_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, +                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +              "sust.b.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], " +              "\\{$r, $g, $b, $a\\};", +              []>; +def SUST_B_1D_ARRAY_V4B16_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, +                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +             "sust.b.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], " +             "\\{$r, $g, $b, $a\\};", +              []>; +def SUST_B_1D_ARRAY_V4B32_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, +                   Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), +             "sust.b.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], " +             "\\{$r, $g, $b, $a\\};", +              []>; + + +def SUST_B_2D_B8_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), +              "sust.b.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", +              []>; +def SUST_B_2D_B16_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), +              "sust.b.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", +              []>; +def SUST_B_2D_B32_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), +              "sust.b.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", +              []>; +def SUST_B_2D_V2B8_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, +                   Int16Regs:$g), +              "sust.b.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", +              []>; +def SUST_B_2D_V2B16_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, +                   Int16Regs:$g), +              "sust.b.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", +              []>; +def SUST_B_2D_V2B32_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, +                   Int32Regs:$g), +              "sust.b.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", +              []>; +def SUST_B_2D_V4B8_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, +                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +              "sust.b.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], " +              "\\{$r, $g, $b, $a\\};", +              []>; +def SUST_B_2D_V4B16_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, +                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +             "sust.b.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], " +             "\\{$r, $g, $b, $a\\};", +              []>; +def SUST_B_2D_V4B32_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, +                   Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), +             "sust.b.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], " +             "\\{$r, $g, $b, $a\\};", +              []>; + + +def SUST_B_2D_ARRAY_B8_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, +                   Int16Regs:$r), +              "sust.b.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", +              []>; +def SUST_B_2D_ARRAY_B16_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, +                   Int16Regs:$r), +              "sust.b.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", +              []>; +def SUST_B_2D_ARRAY_B32_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, +                   Int32Regs:$r), +              "sust.b.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", +              []>; +def SUST_B_2D_ARRAY_V2B8_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, +                   Int16Regs:$r, Int16Regs:$g), +              "sust.b.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " +              "\\{$r, $g\\};", +              []>; +def SUST_B_2D_ARRAY_V2B16_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, +                   Int16Regs:$r, Int16Regs:$g), +             "sust.b.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " +             "\\{$r, $g\\};", +              []>; +def SUST_B_2D_ARRAY_V2B32_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, +                   Int32Regs:$r, Int32Regs:$g), +             "sust.b.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " +             "\\{$r, $g\\};", +              []>; +def SUST_B_2D_ARRAY_V4B8_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, +                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +      "sust.b.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " +      "\\{$r, $g, $b, $a\\};", +              []>; +def SUST_B_2D_ARRAY_V4B16_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, +                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +     "sust.b.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " +     "\\{$r, $g, $b, $a\\};", +              []>; +def SUST_B_2D_ARRAY_V4B32_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, +                   Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), +     "sust.b.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " +     "\\{$r, $g, $b, $a\\};", +              []>; + + +def SUST_B_3D_B8_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +                   Int16Regs:$r), +              "sust.b.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", +              []>; +def SUST_B_3D_B16_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +                   Int16Regs:$r), +              "sust.b.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", +              []>; +def SUST_B_3D_B32_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +                   Int32Regs:$r), +              "sust.b.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", +              []>; +def SUST_B_3D_V2B8_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +                   Int16Regs:$r, Int16Regs:$g), +              "sust.b.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " +              "\\{$r, $g\\};", +              []>; +def SUST_B_3D_V2B16_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +                   Int16Regs:$r, Int16Regs:$g), +              "sust.b.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " +              "\\{$r, $g\\};", +              []>; +def SUST_B_3D_V2B32_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +                   Int32Regs:$r, Int32Regs:$g), +              "sust.b.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " +              "\\{$r, $g\\};", +              []>; +def SUST_B_3D_V4B8_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +         "sust.b.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " +         "\\{$r, $g, $b, $a\\};", +              []>; +def SUST_B_3D_V4B16_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +        "sust.b.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " +        "\\{$r, $g, $b, $a\\};", +              []>; +def SUST_B_3D_V4B32_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +                   Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), +        "sust.b.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " +        "\\{$r, $g, $b, $a\\};", +              []>; + +// Formatted + +def SUST_P_1D_B8_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), +              "sust.p.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};", +              []>; +def SUST_P_1D_B16_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), +              "sust.p.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};", +              []>; +def SUST_P_1D_B32_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), +              "sust.p.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};", +              []>; +def SUST_P_1D_V2B8_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), +              "sust.p.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", +              []>; +def SUST_P_1D_V2B16_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), +              "sust.p.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", +              []>; +def SUST_P_1D_V2B32_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), +              "sust.p.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", +              []>; +def SUST_P_1D_V4B8_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, +                   Int16Regs:$b, Int16Regs:$a), +              "sust.p.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", +              []>; +def SUST_P_1D_V4B16_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, +                   Int16Regs:$b, Int16Regs:$a), +              "sust.p.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", +              []>; +def SUST_P_1D_V4B32_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, +                   Int32Regs:$b, Int32Regs:$a), +              "sust.p.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", +              []>; + + +def SUST_P_1D_ARRAY_B8_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), +              "sust.p.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", +              []>; +def SUST_P_1D_ARRAY_B16_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), +              "sust.p.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", +              []>; +def SUST_P_1D_ARRAY_B32_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r), +              "sust.p.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", +              []>; +def SUST_P_1D_ARRAY_V2B8_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, +                   Int16Regs:$g), +              "sust.p.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", +              []>; +def SUST_P_1D_ARRAY_V2B16_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, +                   Int16Regs:$g), +              "sust.p.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", +              []>; +def SUST_P_1D_ARRAY_V2B32_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, +                   Int32Regs:$g), +              "sust.p.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", +              []>; +def SUST_P_1D_ARRAY_V4B8_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, +                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +              "sust.p.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], " +              "\\{$r, $g, $b, $a\\};", +              []>; +def SUST_P_1D_ARRAY_V4B16_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, +                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +             "sust.p.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], " +             "\\{$r, $g, $b, $a\\};", +              []>; +def SUST_P_1D_ARRAY_V4B32_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, +                   Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), +             "sust.p.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], " +             "\\{$r, $g, $b, $a\\};", +              []>; + + +def SUST_P_2D_B8_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), +              "sust.p.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", +              []>; +def SUST_P_2D_B16_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), +              "sust.p.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", +              []>; +def SUST_P_2D_B32_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), +              "sust.p.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", +              []>; +def SUST_P_2D_V2B8_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, +                   Int16Regs:$g), +              "sust.p.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", +              []>; +def SUST_P_2D_V2B16_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, +                   Int16Regs:$g), +              "sust.p.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", +              []>; +def SUST_P_2D_V2B32_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, +                   Int32Regs:$g), +              "sust.p.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", +              []>; +def SUST_P_2D_V4B8_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, +                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +              "sust.p.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], " +              "\\{$r, $g, $b, $a\\};", +              []>; +def SUST_P_2D_V4B16_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, +                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +             "sust.p.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], " +             "\\{$r, $g, $b, $a\\};", +              []>; +def SUST_P_2D_V4B32_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, +                   Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), +             "sust.p.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], " +             "\\{$r, $g, $b, $a\\};", +              []>; + + +def SUST_P_2D_ARRAY_B8_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, +                   Int16Regs:$r), +              "sust.p.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", +              []>; +def SUST_P_2D_ARRAY_B16_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, +                   Int16Regs:$r), +              "sust.p.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", +              []>; +def SUST_P_2D_ARRAY_B32_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, +                   Int32Regs:$r), +              "sust.p.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", +              []>; +def SUST_P_2D_ARRAY_V2B8_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, +                   Int16Regs:$r, Int16Regs:$g), +              "sust.p.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " +              "\\{$r, $g\\};", +              []>; +def SUST_P_2D_ARRAY_V2B16_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, +                   Int16Regs:$r, Int16Regs:$g), +             "sust.p.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " +             "\\{$r, $g\\};", +              []>; +def SUST_P_2D_ARRAY_V2B32_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, +                   Int32Regs:$r, Int32Regs:$g), +             "sust.p.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " +             "\\{$r, $g\\};", +              []>; +def SUST_P_2D_ARRAY_V4B8_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, +                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +      "sust.p.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " +      "\\{$r, $g, $b, $a\\};", +              []>; +def SUST_P_2D_ARRAY_V4B16_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, +                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +     "sust.p.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " +     "\\{$r, $g, $b, $a\\};", +              []>; +def SUST_P_2D_ARRAY_V4B32_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, +                   Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), +     "sust.p.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " +     "\\{$r, $g, $b, $a\\};", +              []>; + + +def SUST_P_3D_B8_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +                   Int16Regs:$r), +              "sust.p.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", +              []>; +def SUST_P_3D_B16_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +                   Int16Regs:$r), +              "sust.p.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", +              []>; +def SUST_P_3D_B32_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +                   Int32Regs:$r), +              "sust.p.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", +              []>; +def SUST_P_3D_V2B8_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +                   Int16Regs:$r, Int16Regs:$g), +              "sust.p.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " +              "\\{$r, $g\\};", +              []>; +def SUST_P_3D_V2B16_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +                   Int16Regs:$r, Int16Regs:$g), +              "sust.p.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " +              "\\{$r, $g\\};", +              []>; +def SUST_P_3D_V2B32_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +                   Int32Regs:$r, Int32Regs:$g), +              "sust.p.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " +              "\\{$r, $g\\};", +              []>; +def SUST_P_3D_V4B8_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +         "sust.p.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " +         "\\{$r, $g, $b, $a\\};", +              []>; +def SUST_P_3D_V4B16_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +        "sust.p.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " +        "\\{$r, $g, $b, $a\\};", +              []>; +def SUST_P_3D_V4B32_TRAP +  : NVPTXInst<(outs), +              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +                   Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), +        "sust.p.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " +        "\\{$r, $g, $b, $a\\};", +              []>; + + +// Surface store instruction patterns +// I'm not sure why we can't just include these in the instruction definitions, +// but TableGen complains of type errors :( + +def : Pat<(int_nvvm_sust_b_1d_i8_trap +           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), +          (SUST_B_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_i16_trap +           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), +          (SUST_B_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_i32_trap +           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), +          (SUST_B_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_v2i8_trap +           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), +          (SUST_B_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, +           Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_v2i16_trap +           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), +          (SUST_B_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, +           Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_v2i32_trap +           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), +          (SUST_B_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, +           Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_v4i8_trap +           Int64Regs:$s, Int32Regs:$x, +           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +          (SUST_B_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, +           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_1d_v4i16_trap +           Int64Regs:$s, Int32Regs:$x, +           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +          (SUST_B_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, +           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_1d_v4i32_trap +           Int64Regs:$s, Int32Regs:$x, +           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), +          (SUST_B_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, +           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + + +def : Pat<(int_nvvm_sust_b_1d_array_i8_trap +           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), +          (SUST_B_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, +           Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_array_i16_trap +           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), +          (SUST_B_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, +           Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_array_i32_trap +           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), +          (SUST_B_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, +           Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap +          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), +          (SUST_B_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, +           Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap +          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), +          (SUST_B_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, +           Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap +          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), +          (SUST_B_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, +           Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap +           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, +           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +          (SUST_B_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, +           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap +           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, +           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +          (SUST_B_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, +           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap +           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, +           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), +          (SUST_B_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, +           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + + +def : Pat<(int_nvvm_sust_b_2d_i8_trap +           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), +          (SUST_B_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, +           Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_i16_trap +           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), +          (SUST_B_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, +           Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_i32_trap +           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), +          (SUST_B_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, +           Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_v2i8_trap +          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), +          (SUST_B_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, +           Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_v2i16_trap +          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), +          (SUST_B_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, +           Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_v2i32_trap +          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), +          (SUST_B_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, +           Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_v4i8_trap +           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, +           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +          (SUST_B_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, +           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_2d_v4i16_trap +           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, +           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +          (SUST_B_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, +           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_2d_v4i32_trap +           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, +           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), +          (SUST_B_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, +           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + + +def : Pat<(int_nvvm_sust_b_2d_array_i8_trap +          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), +          (SUST_B_2D_ARRAY_B8_TRAP Int64Regs:$s, +           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, +           Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_array_i16_trap +          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), +          (SUST_B_2D_ARRAY_B16_TRAP Int64Regs:$s, +           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, +           Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_array_i32_trap +          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), +          (SUST_B_2D_ARRAY_B32_TRAP Int64Regs:$s, +           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, +           Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap +           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, +           Int16Regs:$r, Int16Regs:$g), +          (SUST_B_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, +           Int32Regs:$x, Int32Regs:$y, +           Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap +           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, +           Int16Regs:$r, Int16Regs:$g), +          (SUST_B_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, +           Int32Regs:$x, Int32Regs:$y, +           Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap +           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, +           Int32Regs:$g), +          (SUST_B_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, +           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap +           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, +           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +          (SUST_B_2D_ARRAY_V4B8_TRAP Int64Regs:$s, +           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, +           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap +           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, +           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +          (SUST_B_2D_ARRAY_V4B16_TRAP Int64Regs:$s, +           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, +           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap +           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, +           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), +          (SUST_B_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, +           Int32Regs:$x, Int32Regs:$y, +           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + + +def : Pat<(int_nvvm_sust_b_3d_i8_trap +           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +           Int16Regs:$r), +          (SUST_B_3D_B8_TRAP Int64Regs:$s, +           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +           Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_3d_i16_trap +           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +           Int16Regs:$r), +          (SUST_B_3D_B16_TRAP Int64Regs:$s, +           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +           Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_3d_i32_trap +           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +           Int32Regs:$r), +          (SUST_B_3D_B32_TRAP Int64Regs:$s, +           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +           Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_3d_v2i8_trap +           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +           Int16Regs:$r, Int16Regs:$g), +          (SUST_B_3D_V2B8_TRAP Int64Regs:$s, +           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +           Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_3d_v2i16_trap +           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +           Int16Regs:$r, Int16Regs:$g), +          (SUST_B_3D_V2B16_TRAP Int64Regs:$s, +           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +           Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_3d_v2i32_trap +           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +           Int32Regs:$r, Int32Regs:$g), +          (SUST_B_3D_V2B32_TRAP Int64Regs:$s, +           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +           Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_3d_v4i8_trap +           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +          (SUST_B_3D_V4B8_TRAP Int64Regs:$s, +           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_3d_v4i16_trap +           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +          (SUST_B_3D_V4B16_TRAP Int64Regs:$s, +           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_3d_v4i32_trap +           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), +          (SUST_B_3D_V4B32_TRAP Int64Regs:$s, +           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + + + +def : Pat<(int_nvvm_sust_p_1d_i8_trap +           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), +          (SUST_P_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_p_1d_i16_trap +           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), +          (SUST_P_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_p_1d_i32_trap +           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), +          (SUST_P_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_p_1d_v2i8_trap +           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), +          (SUST_P_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, +           Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_p_1d_v2i16_trap +           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), +          (SUST_P_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, +           Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_p_1d_v2i32_trap +           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), +          (SUST_P_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, +           Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_p_1d_v4i8_trap +           Int64Regs:$s, Int32Regs:$x, +           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +          (SUST_P_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, +           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_p_1d_v4i16_trap +           Int64Regs:$s, Int32Regs:$x, +           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +          (SUST_P_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, +           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_p_1d_v4i32_trap +           Int64Regs:$s, Int32Regs:$x, +           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), +          (SUST_P_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, +           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + + +def : Pat<(int_nvvm_sust_p_1d_array_i8_trap +           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), +          (SUST_P_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, +           Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_p_1d_array_i16_trap +           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), +          (SUST_P_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, +           Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_p_1d_array_i32_trap +           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), +          (SUST_P_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, +           Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap +          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), +          (SUST_P_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, +           Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap +          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), +          (SUST_P_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, +           Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap +          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), +          (SUST_P_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, +           Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap +           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, +           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +          (SUST_P_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, +           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap +           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, +           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +          (SUST_P_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, +           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap +           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, +           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), +          (SUST_P_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, +           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + + +def : Pat<(int_nvvm_sust_p_2d_i8_trap +           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), +          (SUST_P_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, +           Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_p_2d_i16_trap +           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), +          (SUST_P_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, +           Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_p_2d_i32_trap +           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), +          (SUST_P_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, +           Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_p_2d_v2i8_trap +          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), +          (SUST_P_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, +           Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_p_2d_v2i16_trap +          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), +          (SUST_P_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, +           Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_p_2d_v2i32_trap +          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), +          (SUST_P_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, +           Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_p_2d_v4i8_trap +           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, +           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +          (SUST_P_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, +           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_p_2d_v4i16_trap +           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, +           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +          (SUST_P_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, +           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_p_2d_v4i32_trap +           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, +           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), +          (SUST_P_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, +           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + + +def : Pat<(int_nvvm_sust_p_2d_array_i8_trap +          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), +          (SUST_P_2D_ARRAY_B8_TRAP Int64Regs:$s, +           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, +           Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_p_2d_array_i16_trap +          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), +          (SUST_P_2D_ARRAY_B16_TRAP Int64Regs:$s, +           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, +           Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_p_2d_array_i32_trap +          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), +          (SUST_P_2D_ARRAY_B32_TRAP Int64Regs:$s, +           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, +           Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap +           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, +           Int16Regs:$r, Int16Regs:$g), +          (SUST_P_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, +           Int32Regs:$x, Int32Regs:$y, +           Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap +           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, +           Int16Regs:$r, Int16Regs:$g), +          (SUST_P_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, +           Int32Regs:$x, Int32Regs:$y, +           Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap +           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, +           Int32Regs:$g), +          (SUST_P_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, +           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap +           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, +           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +          (SUST_P_2D_ARRAY_V4B8_TRAP Int64Regs:$s, +           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, +           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap +           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, +           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +          (SUST_P_2D_ARRAY_V4B16_TRAP Int64Regs:$s, +           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, +           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap +           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, +           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), +          (SUST_P_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, +           Int32Regs:$x, Int32Regs:$y, +           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + + +def : Pat<(int_nvvm_sust_p_3d_i8_trap +           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +           Int16Regs:$r), +          (SUST_P_3D_B8_TRAP Int64Regs:$s, +           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +           Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_p_3d_i16_trap +           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +           Int16Regs:$r), +          (SUST_P_3D_B16_TRAP Int64Regs:$s, +           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +           Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_p_3d_i32_trap +           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +           Int32Regs:$r), +          (SUST_P_3D_B32_TRAP Int64Regs:$s, +           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +           Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_p_3d_v2i8_trap +           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +           Int16Regs:$r, Int16Regs:$g), +          (SUST_P_3D_V2B8_TRAP Int64Regs:$s, +           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +           Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_p_3d_v2i16_trap +           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +           Int16Regs:$r, Int16Regs:$g), +          (SUST_P_3D_V2B16_TRAP Int64Regs:$s, +           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +           Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_p_3d_v2i32_trap +           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +           Int32Regs:$r, Int32Regs:$g), +          (SUST_P_3D_V2B32_TRAP Int64Regs:$s, +           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +           Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_p_3d_v4i8_trap +           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +          (SUST_P_3D_V4B8_TRAP Int64Regs:$s, +           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_p_3d_v4i16_trap +           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), +          (SUST_P_3D_V4B16_TRAP Int64Regs:$s, +           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_p_3d_v4i32_trap +           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), +          (SUST_P_3D_V4B32_TRAP Int64Regs:$s, +           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, +           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + +  //===-- Old PTX Back-end Intrinsics ---------------------------------------===// diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h index c9aa87d..5ec1fc9 100644 --- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h +++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h @@ -27,17 +27,17 @@ struct NVPTXLowerAggrCopies : public FunctionPass {    NVPTXLowerAggrCopies() : FunctionPass(ID) {} -  void getAnalysisUsage(AnalysisUsage &AU) const { +  void getAnalysisUsage(AnalysisUsage &AU) const override {      AU.addRequired<DataLayoutPass>();      AU.addPreserved("stack-protector");      AU.addPreserved<MachineFunctionAnalysis>();    } -  virtual bool runOnFunction(Function &F); +  bool runOnFunction(Function &F) override;    static const unsigned MaxAggrCopySize = 128; -  virtual const char *getPassName() const { +  const char *getPassName() const override {      return "Lower aggregate copies/intrinsics into loops";    }  }; diff --git a/lib/Target/NVPTX/NVPTXMCExpr.cpp b/lib/Target/NVPTX/NVPTXMCExpr.cpp index ca24764..137248b 100644 --- a/lib/Target/NVPTX/NVPTXMCExpr.cpp +++ b/lib/Target/NVPTX/NVPTXMCExpr.cpp @@ -7,13 +7,14 @@  //  //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "nvptx-mcexpr"  #include "NVPTXMCExpr.h"  #include "llvm/ADT/StringExtras.h"  #include "llvm/MC/MCAssembler.h"  #include "llvm/MC/MCContext.h"  using namespace llvm; +#define DEBUG_TYPE "nvptx-mcexpr" +  const NVPTXFloatMCExpr*  NVPTXFloatMCExpr::Create(VariantKind Kind, APFloat Flt, MCContext &Ctx) {    return new (Ctx) NVPTXFloatMCExpr(Kind, Flt); diff --git a/lib/Target/NVPTX/NVPTXMCExpr.h b/lib/Target/NVPTX/NVPTXMCExpr.h index 0efb231..0ee018c 100644 --- a/lib/Target/NVPTX/NVPTXMCExpr.h +++ b/lib/Target/NVPTX/NVPTXMCExpr.h @@ -61,18 +61,18 @@ public:  /// @} -  void PrintImpl(raw_ostream &OS) const; +  void PrintImpl(raw_ostream &OS) const override;    bool EvaluateAsRelocatableImpl(MCValue &Res, -                                 const MCAsmLayout *Layout) const { +                                 const MCAsmLayout *Layout) const override {      return false;    } -  void AddValueSymbols(MCAssembler *) const {}; -  const MCSection *FindAssociatedSection() const { -    return NULL; +  void AddValueSymbols(MCAssembler *) const override {}; +  const MCSection *FindAssociatedSection() const override { +    return nullptr;    }    // There are no TLS NVPTXMCExprs at the moment. -  void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {} +  void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {}    static bool classof(const MCExpr *E) {      return E->getKind() == MCExpr::Target; diff --git a/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h b/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h new file mode 100644 index 0000000..67fb390 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h @@ -0,0 +1,46 @@ +//===-- NVPTXMachineFunctionInfo.h - NVPTX-specific Function Info  --------===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source  +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class is attached to a MachineFunction instance and tracks target- +// dependent information +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineFunction.h" + +namespace llvm { +class NVPTXMachineFunctionInfo : public MachineFunctionInfo { +private: +  /// Stores a mapping from index to symbol name for removing image handles +  /// on Fermi. +  SmallVector<std::string, 8> ImageHandleList; + +public: +  NVPTXMachineFunctionInfo(MachineFunction &MF) {} + +  /// Returns the index for the symbol \p Symbol. If the symbol was previously, +  /// added, the same index is returned. Otherwise, the symbol is added and the +  /// new index is returned. +  unsigned getImageHandleSymbolIndex(const char *Symbol) { +    // Is the symbol already present? +    for (unsigned i = 0, e = ImageHandleList.size(); i != e; ++i) +      if (ImageHandleList[i] == std::string(Symbol)) +        return i; +    // Nope, insert it +    ImageHandleList.push_back(Symbol); +    return ImageHandleList.size()-1; +  } + +  /// Returns the symbol name at the given index. +  const char *getImageHandleSymbol(unsigned Idx) const { +    assert(ImageHandleList.size() > Idx && "Bad index"); +    return ImageHandleList[Idx].c_str(); +  } +}; +} diff --git a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp index d5b042a..348ab0c 100644 --- a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp +++ b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp @@ -25,13 +25,15 @@  using namespace llvm; +#define DEBUG_TYPE "nvptx-prolog-epilog" +  namespace {  class NVPTXPrologEpilogPass : public MachineFunctionPass {  public:    static char ID;    NVPTXPrologEpilogPass() : MachineFunctionPass(ID) {} -  virtual bool runOnMachineFunction(MachineFunction &MF); +  bool runOnMachineFunction(MachineFunction &MF) override;  private:    void calculateFrameObjectOffsets(MachineFunction &Fn); @@ -58,7 +60,7 @@ bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) {        for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {          if (!MI->getOperand(i).isFI())            continue; -        TRI.eliminateFrameIndex(MI, 0, i, NULL); +        TRI.eliminateFrameIndex(MI, 0, i, nullptr);          Modified = true;        }      } diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp index 4d3a1d9..62f288b 100644 --- a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp @@ -11,8 +11,6 @@  //  //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "nvptx-reg-info" -  #include "NVPTXRegisterInfo.h"  #include "NVPTX.h"  #include "NVPTXSubtarget.h" @@ -25,6 +23,8 @@  using namespace llvm; +#define DEBUG_TYPE "nvptx-reg-info" +  namespace llvm {  std::string getNVPTXRegClassName(TargetRegisterClass const *RC) {    if (RC == &NVPTX::Float32RegsRegClass) { @@ -78,19 +78,12 @@ NVPTXRegisterInfo::NVPTXRegisterInfo(const NVPTXSubtarget &st)  #include "NVPTXGenRegisterInfo.inc"  /// NVPTX Callee Saved Registers -const uint16_t * +const MCPhysReg *  NVPTXRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { -  static const uint16_t CalleeSavedRegs[] = { 0 }; +  static const MCPhysReg CalleeSavedRegs[] = { 0 };    return CalleeSavedRegs;  } -// NVPTX Callee Saved Reg Classes -const TargetRegisterClass *const * -NVPTXRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { -  static const TargetRegisterClass *const CalleeSavedRegClasses[] = { 0 }; -  return CalleeSavedRegClasses; -} -  BitVector NVPTXRegisterInfo::getReservedRegs(const MachineFunction &MF) const {    BitVector Reserved(getNumRegs());    return Reserved; @@ -113,12 +106,6 @@ void NVPTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,    MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);  } -int NVPTXRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { -  return 0; -} -  unsigned NVPTXRegisterInfo::getFrameRegister(const MachineFunction &MF) const {    return NVPTX::VRFrame;  } - -unsigned NVPTXRegisterInfo::getRARegister() const { return 0; } diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.h b/lib/Target/NVPTX/NVPTXRegisterInfo.h index 0a20f29..a7594be 100644 --- a/lib/Target/NVPTX/NVPTXRegisterInfo.h +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.h @@ -16,11 +16,10 @@  #include "ManagedStringPool.h"  #include "llvm/Target/TargetRegisterInfo.h" +#include <sstream>  #define GET_REGINFO_HEADER  #include "NVPTXGenRegisterInfo.inc" -#include "llvm/Target/TargetRegisterInfo.h" -#include <sstream>  namespace llvm { @@ -42,22 +41,16 @@ public:    //------------------------------------------------------    // NVPTX callee saved registers -  virtual const uint16_t * -  getCalleeSavedRegs(const MachineFunction *MF = 0) const; - -  // NVPTX callee saved register classes -  virtual const TargetRegisterClass *const * -  getCalleeSavedRegClasses(const MachineFunction *MF) const; +  const MCPhysReg * +  getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override; -  virtual BitVector getReservedRegs(const MachineFunction &MF) const; +  BitVector getReservedRegs(const MachineFunction &MF) const override; -  virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, -                                   unsigned FIOperandNum, -                                   RegScavenger *RS = NULL) const; +  void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, +                           unsigned FIOperandNum, +                           RegScavenger *RS = nullptr) const override; -  virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const; -  virtual unsigned getFrameRegister(const MachineFunction &MF) const; -  virtual unsigned getRARegister() const; +  unsigned getFrameRegister(const MachineFunction &MF) const override;    ManagedStringPool *getStrPool() const {      return const_cast<ManagedStringPool *>(&ManagedStrPool); diff --git a/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp new file mode 100644 index 0000000..afd53a6 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp @@ -0,0 +1,357 @@ +//===-- NVPTXReplaceImageHandles.cpp - Replace image handles for Fermi ----===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source  +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// On Fermi, image handles are not supported. To work around this, we traverse +// the machine code and replace image handles with concrete symbols. For this +// to work reliably, inlining of all function call must be performed. +// +//===----------------------------------------------------------------------===// + +#include "NVPTX.h" +#include "NVPTXMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/DenseSet.h" + +using namespace llvm; + +namespace { +class NVPTXReplaceImageHandles : public MachineFunctionPass { +private: +  static char ID; +  DenseSet<MachineInstr *> InstrsToRemove; + +public: +  NVPTXReplaceImageHandles(); + +  bool runOnMachineFunction(MachineFunction &MF) override; +private: +  bool processInstr(MachineInstr &MI); +  void replaceImageHandle(MachineOperand &Op, MachineFunction &MF); +}; +} + +char NVPTXReplaceImageHandles::ID = 0; + +NVPTXReplaceImageHandles::NVPTXReplaceImageHandles() +  : MachineFunctionPass(ID) {} + +bool NVPTXReplaceImageHandles::runOnMachineFunction(MachineFunction &MF) { +  bool Changed = false; +  InstrsToRemove.clear(); + +  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; +       ++BI) { +    for (MachineBasicBlock::iterator I = (*BI).begin(), E = (*BI).end(); +         I != E; ++I) { +      MachineInstr &MI = *I; +      Changed |= processInstr(MI); +    } +  } + +  // Now clean up any handle-access instructions +  // This is needed in debug mode when code cleanup passes are not executed, +  // but we need the handle access to be eliminated because they are not +  // valid instructions when image handles are disabled. +  for (DenseSet<MachineInstr *>::iterator I = InstrsToRemove.begin(), +       E = InstrsToRemove.end(); I != E; ++I) { +    (*I)->eraseFromParent(); +  } + +  return Changed; +} + +bool NVPTXReplaceImageHandles::processInstr(MachineInstr &MI) { +  MachineFunction &MF = *MI.getParent()->getParent(); +  // Check if we have a surface/texture instruction +  switch (MI.getOpcode()) { +  default: return false; +  case NVPTX::TEX_1D_F32_I32: +  case NVPTX::TEX_1D_F32_F32: +  case NVPTX::TEX_1D_F32_F32_LEVEL: +  case NVPTX::TEX_1D_F32_F32_GRAD: +  case NVPTX::TEX_1D_I32_I32: +  case NVPTX::TEX_1D_I32_F32: +  case NVPTX::TEX_1D_I32_F32_LEVEL: +  case NVPTX::TEX_1D_I32_F32_GRAD: +  case NVPTX::TEX_1D_ARRAY_F32_I32: +  case NVPTX::TEX_1D_ARRAY_F32_F32: +  case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL: +  case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD: +  case NVPTX::TEX_1D_ARRAY_I32_I32: +  case NVPTX::TEX_1D_ARRAY_I32_F32: +  case NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL: +  case NVPTX::TEX_1D_ARRAY_I32_F32_GRAD: +  case NVPTX::TEX_2D_F32_I32: +  case NVPTX::TEX_2D_F32_F32: +  case NVPTX::TEX_2D_F32_F32_LEVEL: +  case NVPTX::TEX_2D_F32_F32_GRAD: +  case NVPTX::TEX_2D_I32_I32: +  case NVPTX::TEX_2D_I32_F32: +  case NVPTX::TEX_2D_I32_F32_LEVEL: +  case NVPTX::TEX_2D_I32_F32_GRAD: +  case NVPTX::TEX_2D_ARRAY_F32_I32: +  case NVPTX::TEX_2D_ARRAY_F32_F32: +  case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL: +  case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD: +  case NVPTX::TEX_2D_ARRAY_I32_I32: +  case NVPTX::TEX_2D_ARRAY_I32_F32: +  case NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL: +  case NVPTX::TEX_2D_ARRAY_I32_F32_GRAD: +  case NVPTX::TEX_3D_F32_I32: +  case NVPTX::TEX_3D_F32_F32: +  case NVPTX::TEX_3D_F32_F32_LEVEL: +  case NVPTX::TEX_3D_F32_F32_GRAD: +  case NVPTX::TEX_3D_I32_I32: +  case NVPTX::TEX_3D_I32_F32: +  case NVPTX::TEX_3D_I32_F32_LEVEL: +  case NVPTX::TEX_3D_I32_F32_GRAD: { +    // This is a texture fetch, so operand 4 is a texref and operand 5 is +    // a samplerref +    MachineOperand &TexHandle = MI.getOperand(4); +    MachineOperand &SampHandle = MI.getOperand(5); + +    replaceImageHandle(TexHandle, MF); +    replaceImageHandle(SampHandle, MF); + +    return true; +  } +  case NVPTX::SULD_1D_I8_TRAP: +  case NVPTX::SULD_1D_I16_TRAP: +  case NVPTX::SULD_1D_I32_TRAP: +  case NVPTX::SULD_1D_ARRAY_I8_TRAP: +  case NVPTX::SULD_1D_ARRAY_I16_TRAP: +  case NVPTX::SULD_1D_ARRAY_I32_TRAP: +  case NVPTX::SULD_2D_I8_TRAP: +  case NVPTX::SULD_2D_I16_TRAP: +  case NVPTX::SULD_2D_I32_TRAP: +  case NVPTX::SULD_2D_ARRAY_I8_TRAP: +  case NVPTX::SULD_2D_ARRAY_I16_TRAP: +  case NVPTX::SULD_2D_ARRAY_I32_TRAP: +  case NVPTX::SULD_3D_I8_TRAP: +  case NVPTX::SULD_3D_I16_TRAP: +  case NVPTX::SULD_3D_I32_TRAP: { +    // This is a V1 surface load, so operand 1 is a surfref +    MachineOperand &SurfHandle = MI.getOperand(1); + +    replaceImageHandle(SurfHandle, MF); + +    return true; +  } +  case NVPTX::SULD_1D_V2I8_TRAP: +  case NVPTX::SULD_1D_V2I16_TRAP: +  case NVPTX::SULD_1D_V2I32_TRAP: +  case NVPTX::SULD_1D_ARRAY_V2I8_TRAP: +  case NVPTX::SULD_1D_ARRAY_V2I16_TRAP: +  case NVPTX::SULD_1D_ARRAY_V2I32_TRAP: +  case NVPTX::SULD_2D_V2I8_TRAP: +  case NVPTX::SULD_2D_V2I16_TRAP: +  case NVPTX::SULD_2D_V2I32_TRAP: +  case NVPTX::SULD_2D_ARRAY_V2I8_TRAP: +  case NVPTX::SULD_2D_ARRAY_V2I16_TRAP: +  case NVPTX::SULD_2D_ARRAY_V2I32_TRAP: +  case NVPTX::SULD_3D_V2I8_TRAP: +  case NVPTX::SULD_3D_V2I16_TRAP: +  case NVPTX::SULD_3D_V2I32_TRAP: { +    // This is a V2 surface load, so operand 2 is a surfref +    MachineOperand &SurfHandle = MI.getOperand(2); + +    replaceImageHandle(SurfHandle, MF); + +    return true; +  } +  case NVPTX::SULD_1D_V4I8_TRAP: +  case NVPTX::SULD_1D_V4I16_TRAP: +  case NVPTX::SULD_1D_V4I32_TRAP: +  case NVPTX::SULD_1D_ARRAY_V4I8_TRAP: +  case NVPTX::SULD_1D_ARRAY_V4I16_TRAP: +  case NVPTX::SULD_1D_ARRAY_V4I32_TRAP: +  case NVPTX::SULD_2D_V4I8_TRAP: +  case NVPTX::SULD_2D_V4I16_TRAP: +  case NVPTX::SULD_2D_V4I32_TRAP: +  case NVPTX::SULD_2D_ARRAY_V4I8_TRAP: +  case NVPTX::SULD_2D_ARRAY_V4I16_TRAP: +  case NVPTX::SULD_2D_ARRAY_V4I32_TRAP: +  case NVPTX::SULD_3D_V4I8_TRAP: +  case NVPTX::SULD_3D_V4I16_TRAP: +  case NVPTX::SULD_3D_V4I32_TRAP: { +    // This is a V4 surface load, so operand 4 is a surfref +    MachineOperand &SurfHandle = MI.getOperand(4); + +    replaceImageHandle(SurfHandle, MF); + +    return true; +  } +  case NVPTX::SUST_B_1D_B8_TRAP: +  case NVPTX::SUST_B_1D_B16_TRAP: +  case NVPTX::SUST_B_1D_B32_TRAP: +  case NVPTX::SUST_B_1D_V2B8_TRAP: +  case NVPTX::SUST_B_1D_V2B16_TRAP: +  case NVPTX::SUST_B_1D_V2B32_TRAP: +  case NVPTX::SUST_B_1D_V4B8_TRAP: +  case NVPTX::SUST_B_1D_V4B16_TRAP: +  case NVPTX::SUST_B_1D_V4B32_TRAP: +  case NVPTX::SUST_B_1D_ARRAY_B8_TRAP: +  case NVPTX::SUST_B_1D_ARRAY_B16_TRAP: +  case NVPTX::SUST_B_1D_ARRAY_B32_TRAP: +  case NVPTX::SUST_B_1D_ARRAY_V2B8_TRAP: +  case NVPTX::SUST_B_1D_ARRAY_V2B16_TRAP: +  case NVPTX::SUST_B_1D_ARRAY_V2B32_TRAP: +  case NVPTX::SUST_B_1D_ARRAY_V4B8_TRAP: +  case NVPTX::SUST_B_1D_ARRAY_V4B16_TRAP: +  case NVPTX::SUST_B_1D_ARRAY_V4B32_TRAP: +  case NVPTX::SUST_B_2D_B8_TRAP: +  case NVPTX::SUST_B_2D_B16_TRAP: +  case NVPTX::SUST_B_2D_B32_TRAP: +  case NVPTX::SUST_B_2D_V2B8_TRAP: +  case NVPTX::SUST_B_2D_V2B16_TRAP: +  case NVPTX::SUST_B_2D_V2B32_TRAP: +  case NVPTX::SUST_B_2D_V4B8_TRAP: +  case NVPTX::SUST_B_2D_V4B16_TRAP: +  case NVPTX::SUST_B_2D_V4B32_TRAP: +  case NVPTX::SUST_B_2D_ARRAY_B8_TRAP: +  case NVPTX::SUST_B_2D_ARRAY_B16_TRAP: +  case NVPTX::SUST_B_2D_ARRAY_B32_TRAP: +  case NVPTX::SUST_B_2D_ARRAY_V2B8_TRAP: +  case NVPTX::SUST_B_2D_ARRAY_V2B16_TRAP: +  case NVPTX::SUST_B_2D_ARRAY_V2B32_TRAP: +  case NVPTX::SUST_B_2D_ARRAY_V4B8_TRAP: +  case NVPTX::SUST_B_2D_ARRAY_V4B16_TRAP: +  case NVPTX::SUST_B_2D_ARRAY_V4B32_TRAP: +  case NVPTX::SUST_B_3D_B8_TRAP: +  case NVPTX::SUST_B_3D_B16_TRAP: +  case NVPTX::SUST_B_3D_B32_TRAP: +  case NVPTX::SUST_B_3D_V2B8_TRAP: +  case NVPTX::SUST_B_3D_V2B16_TRAP: +  case NVPTX::SUST_B_3D_V2B32_TRAP: +  case NVPTX::SUST_B_3D_V4B8_TRAP: +  case NVPTX::SUST_B_3D_V4B16_TRAP: +  case NVPTX::SUST_B_3D_V4B32_TRAP: +  case NVPTX::SUST_P_1D_B8_TRAP: +  case NVPTX::SUST_P_1D_B16_TRAP: +  case NVPTX::SUST_P_1D_B32_TRAP: +  case NVPTX::SUST_P_1D_V2B8_TRAP: +  case NVPTX::SUST_P_1D_V2B16_TRAP: +  case NVPTX::SUST_P_1D_V2B32_TRAP: +  case NVPTX::SUST_P_1D_V4B8_TRAP: +  case NVPTX::SUST_P_1D_V4B16_TRAP: +  case NVPTX::SUST_P_1D_V4B32_TRAP: +  case NVPTX::SUST_P_1D_ARRAY_B8_TRAP: +  case NVPTX::SUST_P_1D_ARRAY_B16_TRAP: +  case NVPTX::SUST_P_1D_ARRAY_B32_TRAP: +  case NVPTX::SUST_P_1D_ARRAY_V2B8_TRAP: +  case NVPTX::SUST_P_1D_ARRAY_V2B16_TRAP: +  case NVPTX::SUST_P_1D_ARRAY_V2B32_TRAP: +  case NVPTX::SUST_P_1D_ARRAY_V4B8_TRAP: +  case NVPTX::SUST_P_1D_ARRAY_V4B16_TRAP: +  case NVPTX::SUST_P_1D_ARRAY_V4B32_TRAP: +  case NVPTX::SUST_P_2D_B8_TRAP: +  case NVPTX::SUST_P_2D_B16_TRAP: +  case NVPTX::SUST_P_2D_B32_TRAP: +  case NVPTX::SUST_P_2D_V2B8_TRAP: +  case NVPTX::SUST_P_2D_V2B16_TRAP: +  case NVPTX::SUST_P_2D_V2B32_TRAP: +  case NVPTX::SUST_P_2D_V4B8_TRAP: +  case NVPTX::SUST_P_2D_V4B16_TRAP: +  case NVPTX::SUST_P_2D_V4B32_TRAP: +  case NVPTX::SUST_P_2D_ARRAY_B8_TRAP: +  case NVPTX::SUST_P_2D_ARRAY_B16_TRAP: +  case NVPTX::SUST_P_2D_ARRAY_B32_TRAP: +  case NVPTX::SUST_P_2D_ARRAY_V2B8_TRAP: +  case NVPTX::SUST_P_2D_ARRAY_V2B16_TRAP: +  case NVPTX::SUST_P_2D_ARRAY_V2B32_TRAP: +  case NVPTX::SUST_P_2D_ARRAY_V4B8_TRAP: +  case NVPTX::SUST_P_2D_ARRAY_V4B16_TRAP: +  case NVPTX::SUST_P_2D_ARRAY_V4B32_TRAP: +  case NVPTX::SUST_P_3D_B8_TRAP: +  case NVPTX::SUST_P_3D_B16_TRAP: +  case NVPTX::SUST_P_3D_B32_TRAP: +  case NVPTX::SUST_P_3D_V2B8_TRAP: +  case NVPTX::SUST_P_3D_V2B16_TRAP: +  case NVPTX::SUST_P_3D_V2B32_TRAP: +  case NVPTX::SUST_P_3D_V4B8_TRAP: +  case NVPTX::SUST_P_3D_V4B16_TRAP: +  case NVPTX::SUST_P_3D_V4B32_TRAP: { +    // This is a surface store, so operand 0 is a surfref +    MachineOperand &SurfHandle = MI.getOperand(0); + +    replaceImageHandle(SurfHandle, MF); + +    return true; +  } +  case NVPTX::TXQ_CHANNEL_ORDER: +  case NVPTX::TXQ_CHANNEL_DATA_TYPE: +  case NVPTX::TXQ_WIDTH: +  case NVPTX::TXQ_HEIGHT: +  case NVPTX::TXQ_DEPTH: +  case NVPTX::TXQ_ARRAY_SIZE: +  case NVPTX::TXQ_NUM_SAMPLES: +  case NVPTX::TXQ_NUM_MIPMAP_LEVELS: +  case NVPTX::SUQ_CHANNEL_ORDER: +  case NVPTX::SUQ_CHANNEL_DATA_TYPE: +  case NVPTX::SUQ_WIDTH: +  case NVPTX::SUQ_HEIGHT: +  case NVPTX::SUQ_DEPTH: +  case NVPTX::SUQ_ARRAY_SIZE: { +    // This is a query, so operand 1 is a surfref/texref +    MachineOperand &Handle = MI.getOperand(1); + +    replaceImageHandle(Handle, MF); + +    return true;  +  } +  } +} + +void NVPTXReplaceImageHandles:: +replaceImageHandle(MachineOperand &Op, MachineFunction &MF) { +  const MachineRegisterInfo &MRI = MF.getRegInfo(); +  NVPTXMachineFunctionInfo *MFI = MF.getInfo<NVPTXMachineFunctionInfo>(); +  // Which instruction defines the handle? +  MachineInstr *MI = MRI.getVRegDef(Op.getReg()); +  assert(MI && "No def for image handle vreg?"); +  MachineInstr &TexHandleDef = *MI; + +  switch (TexHandleDef.getOpcode()) { +  case NVPTX::LD_i64_avar: { +    // The handle is a parameter value being loaded, replace with the +    // parameter symbol +    assert(TexHandleDef.getOperand(6).isSymbol() && "Load is not a symbol!"); +    StringRef Sym = TexHandleDef.getOperand(6).getSymbolName(); +    std::string ParamBaseName = MF.getName(); +    ParamBaseName += "_param_"; +    assert(Sym.startswith(ParamBaseName) && "Invalid symbol reference"); +    unsigned Param = atoi(Sym.data()+ParamBaseName.size()); +    std::string NewSym; +    raw_string_ostream NewSymStr(NewSym); +    NewSymStr << MF.getFunction()->getName() << "_param_" << Param; +    Op.ChangeToImmediate( +      MFI->getImageHandleSymbolIndex(NewSymStr.str().c_str())); +    InstrsToRemove.insert(&TexHandleDef); +    break; +  } +  case NVPTX::texsurf_handles: { +    // The handle is a global variable, replace with the global variable name +    assert(TexHandleDef.getOperand(1).isGlobal() && "Load is not a global!"); +    const GlobalValue *GV = TexHandleDef.getOperand(1).getGlobal(); +    assert(GV->hasName() && "Global sampler must be named!"); +    Op.ChangeToImmediate(MFI->getImageHandleSymbolIndex(GV->getName().data())); +    InstrsToRemove.insert(&TexHandleDef); +    break; +  } +  default: +    llvm_unreachable("Unknown instruction operating on handle"); +  } +} + +MachineFunctionPass *llvm::createNVPTXReplaceImageHandlesPass() { +  return new NVPTXReplaceImageHandles(); +} diff --git a/lib/Target/NVPTX/NVPTXSection.h b/lib/Target/NVPTX/NVPTXSection.h index f8a692e..aa0436b 100644 --- a/lib/Target/NVPTX/NVPTXSection.h +++ b/lib/Target/NVPTX/NVPTXSection.h @@ -31,16 +31,16 @@ public:    /// Override this as NVPTX has its own way of printing switching    /// to a section. -  virtual void PrintSwitchToSection(const MCAsmInfo &MAI, -                                    raw_ostream &OS, -                                    const MCExpr *Subsection) const {} +  void PrintSwitchToSection(const MCAsmInfo &MAI, +                            raw_ostream &OS, +                            const MCExpr *Subsection) const override {}    /// Base address of PTX sections is zero. -  virtual bool isBaseAddressKnownZero() const { return true; } -  virtual bool UseCodeAlign() const { return false; } -  virtual bool isVirtualSection() const { return false; } -  virtual std::string getLabelBeginName() const { return ""; } -  virtual std::string getLabelEndName() const { return ""; } +  bool isBaseAddressKnownZero() const override { return true; } +  bool UseCodeAlign() const override { return false; } +  bool isVirtualSection() const override { return false; } +  std::string getLabelBeginName() const override { return ""; } +  std::string getLabelEndName() const override { return ""; }  };  } // end namespace llvm diff --git a/lib/Target/NVPTX/NVPTXSubtarget.cpp b/lib/Target/NVPTX/NVPTXSubtarget.cpp index 9771a17..8c7df52 100644 --- a/lib/Target/NVPTX/NVPTXSubtarget.cpp +++ b/lib/Target/NVPTX/NVPTXSubtarget.cpp @@ -12,14 +12,16 @@  //===----------------------------------------------------------------------===//  #include "NVPTXSubtarget.h" + +using namespace llvm; + +#define DEBUG_TYPE "nvptx-subtarget" +  #define GET_SUBTARGETINFO_ENUM  #define GET_SUBTARGETINFO_TARGET_DESC  #define GET_SUBTARGETINFO_CTOR  #include "NVPTXGenSubtargetInfo.inc" -using namespace llvm; - -  // Pin the vtable to this file.  void NVPTXSubtarget::anchor() {} diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h index f99bebd..581e5ed 100644 --- a/lib/Target/NVPTX/NVPTXSubtarget.h +++ b/lib/Target/NVPTX/NVPTXSubtarget.h @@ -16,12 +16,11 @@  #include "NVPTX.h"  #include "llvm/Target/TargetSubtargetInfo.h" +#include <string>  #define GET_SUBTARGETINFO_HEADER  #include "NVPTXGenSubtargetInfo.inc" -#include <string> -  namespace llvm {  class NVPTXSubtarget : public NVPTXGenSubtargetInfo { @@ -65,6 +64,10 @@ public:    inline bool hasROT32() const { return hasHWROT32() || hasSWROT32(); }    inline bool hasROT64() const { return SmVersion >= 20; } +  bool hasImageHandles() const { +    // Currently disabled +    return false; +  }    bool is64Bit() const { return Is64Bit; }    unsigned int getSmVersion() const { return SmVersion; } diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 7d7d793..26a4f84 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -16,7 +16,6 @@  #include "NVPTX.h"  #include "NVPTXAllocaHoisting.h"  #include "NVPTXLowerAggrCopies.h" -#include "llvm/ADT/OwningPtr.h"  #include "llvm/Analysis/Passes.h"  #include "llvm/CodeGen/AsmPrinter.h"  #include "llvm/CodeGen/MachineFunctionAnalysis.h" @@ -50,6 +49,7 @@ namespace llvm {  void initializeNVVMReflectPass(PassRegistry&);  void initializeGenericToNVVMPass(PassRegistry&);  void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&); +void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &);  }  extern "C" void LLVMInitializeNVPTXTarget() { @@ -62,6 +62,8 @@ extern "C" void LLVMInitializeNVPTXTarget() {    initializeNVVMReflectPass(*PassRegistry::getPassRegistry());    initializeGenericToNVVMPass(*PassRegistry::getPassRegistry());    initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry()); +  initializeNVPTXFavorNonGenericAddrSpacesPass( +    *PassRegistry::getPassRegistry());  }  static std::string computeDataLayout(const NVPTXSubtarget &ST) { @@ -113,14 +115,14 @@ public:      return getTM<NVPTXTargetMachine>();    } -  virtual void addIRPasses(); -  virtual bool addInstSelector(); -  virtual bool addPreRegAlloc(); -  virtual bool addPostRegAlloc(); +  void addIRPasses() override; +  bool addInstSelector() override; +  bool addPreRegAlloc() override; +  bool addPostRegAlloc() override; -  virtual FunctionPass *createTargetRegisterAllocator(bool) override; -  virtual void addFastRegAlloc(FunctionPass *RegAllocPass); -  virtual void addOptimizedRegAlloc(FunctionPass *RegAllocPass); +  FunctionPass *createTargetRegisterAllocator(bool) override; +  void addFastRegAlloc(FunctionPass *RegAllocPass) override; +  void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;  };  } // end anonymous namespace @@ -140,15 +142,42 @@ void NVPTXPassConfig::addIRPasses() {    disablePass(&BranchFolderPassID);    disablePass(&TailDuplicateID); +  addPass(createNVPTXImageOptimizerPass());    TargetPassConfig::addIRPasses();    addPass(createNVPTXAssignValidGlobalNamesPass());    addPass(createGenericToNVVMPass()); +  addPass(createNVPTXFavorNonGenericAddrSpacesPass()); +  addPass(createSeparateConstOffsetFromGEPPass()); +  // The SeparateConstOffsetFromGEP pass creates variadic bases that can be used +  // by multiple GEPs. Run GVN or EarlyCSE to really reuse them. GVN generates +  // significantly better code than EarlyCSE for some of our benchmarks. +  if (getOptLevel() == CodeGenOpt::Aggressive) +    addPass(createGVNPass()); +  else +    addPass(createEarlyCSEPass()); +  // Both FavorNonGenericAddrSpaces and SeparateConstOffsetFromGEP may leave +  // some dead code.  We could remove dead code in an ad-hoc manner, but that +  // requires manual work and might be error-prone. +  // +  // The FavorNonGenericAddrSpaces pass shortcuts unnecessary addrspacecasts, +  // and leave them unused. +  // +  // SeparateConstOffsetFromGEP rebuilds a new index from the old index, and the +  // old index and some of its intermediate results may become unused. +  addPass(createDeadCodeEliminationPass());  }  bool NVPTXPassConfig::addInstSelector() { +  const NVPTXSubtarget &ST = +    getTM<NVPTXTargetMachine>().getSubtarget<NVPTXSubtarget>(); +    addPass(createLowerAggrCopies());    addPass(createAllocaHoisting());    addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel())); + +  if (!ST.hasImageHandles()) +    addPass(createNVPTXReplaceImageHandlesPass()); +    return false;  } @@ -159,7 +188,7 @@ bool NVPTXPassConfig::addPostRegAlloc() {  }  FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) { -  return 0; // No reg alloc +  return nullptr; // No reg alloc  }  void NVPTXPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h index 5fbcf73..2db7c18 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.h +++ b/lib/Target/NVPTX/NVPTXTargetMachine.h @@ -51,22 +51,22 @@ public:                       const TargetOptions &Options, Reloc::Model RM,                       CodeModel::Model CM, CodeGenOpt::Level OP, bool is64bit); -  virtual const TargetFrameLowering *getFrameLowering() const { +  const TargetFrameLowering *getFrameLowering() const override {      return &FrameLowering;    } -  virtual const NVPTXInstrInfo *getInstrInfo() const { return &InstrInfo; } -  virtual const DataLayout *getDataLayout() const { return &DL; } -  virtual const NVPTXSubtarget *getSubtargetImpl() const { return &Subtarget; } +  const NVPTXInstrInfo *getInstrInfo() const override { return &InstrInfo; } +  const DataLayout *getDataLayout() const override { return &DL; } +  const NVPTXSubtarget *getSubtargetImpl() const override { return &Subtarget; } -  virtual const NVPTXRegisterInfo *getRegisterInfo() const { +  const NVPTXRegisterInfo *getRegisterInfo() const override {      return &(InstrInfo.getRegisterInfo());    } -  virtual NVPTXTargetLowering *getTargetLowering() const { +  NVPTXTargetLowering *getTargetLowering() const override {      return const_cast<NVPTXTargetLowering *>(&TLInfo);    } -  virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const { +  const TargetSelectionDAGInfo *getSelectionDAGInfo() const override {      return &TSInfo;    } @@ -79,17 +79,17 @@ public:      return const_cast<ManagedStringPool *>(&ManagedStrPool);    } -  virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); +  TargetPassConfig *createPassConfig(PassManagerBase &PM) override;    // Emission of machine code through JITCodeEmitter is not supported. -  virtual bool addPassesToEmitMachineCode(PassManagerBase &, JITCodeEmitter &, -                                          bool = true) { +  bool addPassesToEmitMachineCode(PassManagerBase &, JITCodeEmitter &, +                                  bool = true) override {      return true;    }    // Emission of machine code through MCJIT is not supported. -  virtual bool addPassesToEmitMC(PassManagerBase &, MCContext *&, raw_ostream &, -                                 bool = true) { +  bool addPassesToEmitMC(PassManagerBase &, MCContext *&, raw_ostream &, +                         bool = true) override {      return true;    } diff --git a/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/lib/Target/NVPTX/NVPTXTargetObjectFile.h index 2a7281e..0b438c5 100644 --- a/lib/Target/NVPTX/NVPTXTargetObjectFile.h +++ b/lib/Target/NVPTX/NVPTXTargetObjectFile.h @@ -22,26 +22,26 @@ class NVPTXTargetObjectFile : public TargetLoweringObjectFile {  public:    NVPTXTargetObjectFile() { -    TextSection = 0; -    DataSection = 0; -    BSSSection = 0; -    ReadOnlySection = 0; +    TextSection = nullptr; +    DataSection = nullptr; +    BSSSection = nullptr; +    ReadOnlySection = nullptr; -    StaticCtorSection = 0; -    StaticDtorSection = 0; -    LSDASection = 0; -    EHFrameSection = 0; -    DwarfAbbrevSection = 0; -    DwarfInfoSection = 0; -    DwarfLineSection = 0; -    DwarfFrameSection = 0; -    DwarfPubTypesSection = 0; -    DwarfDebugInlineSection = 0; -    DwarfStrSection = 0; -    DwarfLocSection = 0; -    DwarfARangesSection = 0; -    DwarfRangesSection = 0; -    DwarfMacroInfoSection = 0; +    StaticCtorSection = nullptr; +    StaticDtorSection = nullptr; +    LSDASection = nullptr; +    EHFrameSection = nullptr; +    DwarfAbbrevSection = nullptr; +    DwarfInfoSection = nullptr; +    DwarfLineSection = nullptr; +    DwarfFrameSection = nullptr; +    DwarfPubTypesSection = nullptr; +    DwarfDebugInlineSection = nullptr; +    DwarfStrSection = nullptr; +    DwarfLocSection = nullptr; +    DwarfARangesSection = nullptr; +    DwarfRangesSection = nullptr; +    DwarfMacroInfoSection = nullptr;    }    virtual ~NVPTXTargetObjectFile(); diff --git a/lib/Target/NVPTX/NVPTXUtilities.cpp b/lib/Target/NVPTX/NVPTXUtilities.cpp index 60a5173..a9fd190b 100644 --- a/lib/Target/NVPTX/NVPTXUtilities.cpp +++ b/lib/Target/NVPTX/NVPTXUtilities.cpp @@ -22,9 +22,9 @@  #include <map>  #include <string>  #include <vector> -//#include <iostream>  #include "llvm/Support/ManagedStatic.h"  #include "llvm/IR/InstIterator.h" +#include "llvm/Support/MutexGuard.h"  using namespace llvm; @@ -33,8 +33,15 @@ typedef std::map<const GlobalValue *, key_val_pair_t> global_val_annot_t;  typedef std::map<const Module *, global_val_annot_t> per_module_annot_t;  ManagedStatic<per_module_annot_t> annotationCache; +static sys::Mutex Lock; + +void llvm::clearAnnotationCache(const llvm::Module *Mod) { +  MutexGuard Guard(Lock); +  annotationCache->erase(Mod); +}  static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) { +  MutexGuard Guard(Lock);    assert(md && "Invalid mdnode for annotation");    assert((md->getNumOperands() % 2) == 1 && "Invalid number of operands");    // start index = 1, to skip the global variable key @@ -60,6 +67,7 @@ static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) {  }  static void cacheAnnotationFromMD(const Module *m, const GlobalValue *gv) { +  MutexGuard Guard(Lock);    NamedMDNode *NMD = m->getNamedMetadata(llvm::NamedMDForAnnotations);    if (!NMD)      return; @@ -92,6 +100,7 @@ static void cacheAnnotationFromMD(const Module *m, const GlobalValue *gv) {  bool llvm::findOneNVVMAnnotation(const GlobalValue *gv, std::string prop,                                   unsigned &retval) { +  MutexGuard Guard(Lock);    const Module *m = gv->getParent();    if ((*annotationCache).find(m) == (*annotationCache).end())      cacheAnnotationFromMD(m, gv); @@ -105,6 +114,7 @@ bool llvm::findOneNVVMAnnotation(const GlobalValue *gv, std::string prop,  bool llvm::findAllNVVMAnnotation(const GlobalValue *gv, std::string prop,                                   std::vector<unsigned> &retval) { +  MutexGuard Guard(Lock);    const Module *m = gv->getParent();    if ((*annotationCache).find(m) == (*annotationCache).end())      cacheAnnotationFromMD(m, gv); @@ -195,8 +205,37 @@ bool llvm::isImageWriteOnly(const llvm::Value &val) {    return false;  } +bool llvm::isImageReadWrite(const llvm::Value &val) { +  if (const Argument *arg = dyn_cast<Argument>(&val)) { +    const Function *func = arg->getParent(); +    std::vector<unsigned> annot; +    if (llvm::findAllNVVMAnnotation(func, +                                    llvm::PropertyAnnotationNames[ +                                        llvm::PROPERTY_ISREADWRITE_IMAGE_PARAM], +                                    annot)) { +      if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end()) +        return true; +    } +  } +  return false; +} +  bool llvm::isImage(const llvm::Value &val) { -  return llvm::isImageReadOnly(val) || llvm::isImageWriteOnly(val); +  return llvm::isImageReadOnly(val) || llvm::isImageWriteOnly(val) || +         llvm::isImageReadWrite(val); +} + +bool llvm::isManaged(const llvm::Value &val) { +  if(const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) { +    unsigned annot; +    if(llvm::findOneNVVMAnnotation(gv, +                          llvm::PropertyAnnotationNames[llvm::PROPERTY_MANAGED], +                                   annot)) { +      assert((annot == 1) && "Unexpected annotation on a managed symbol"); +      return true; +    } +  } +  return false;  }  std::string llvm::getTextureName(const llvm::Value &val) { @@ -354,12 +393,12 @@ llvm::skipPointerTransfer(const Value *V, bool ignore_GEP_indices) {  const Value *  llvm::skipPointerTransfer(const Value *V, std::set<const Value *> &processed) {    if (processed.find(V) != processed.end()) -    return NULL; +    return nullptr;    processed.insert(V);    const Value *V2 = V->stripPointerCasts();    if (V2 != V && processed.find(V2) != processed.end()) -    return NULL; +    return nullptr;    processed.insert(V2);    V = V2; @@ -375,20 +414,20 @@ llvm::skipPointerTransfer(const Value *V, std::set<const Value *> &processed) {        continue;      } else if (const PHINode *PN = dyn_cast<PHINode>(V)) {        if (V != V2 && processed.find(V) != processed.end()) -        return NULL; +        return nullptr;        processed.insert(PN); -      const Value *common = 0; +      const Value *common = nullptr;        for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {          const Value *pv = PN->getIncomingValue(i);          const Value *base = skipPointerTransfer(pv, processed);          if (base) { -          if (common == 0) +          if (!common)              common = base;            else if (common != base)              return PN;          }        } -      if (common == 0) +      if (!common)          return PN;        V = common;      } @@ -406,7 +445,7 @@ BasicBlock *llvm::getParentBlock(Value *v) {    if (Instruction *I = dyn_cast<Instruction>(v))      return I->getParent(); -  return 0; +  return nullptr;  }  Function *llvm::getParentFunction(Value *v) { @@ -419,13 +458,13 @@ Function *llvm::getParentFunction(Value *v) {    if (BasicBlock *B = dyn_cast<BasicBlock>(v))      return B->getParent(); -  return 0; +  return nullptr;  }  // Dump a block by name  void llvm::dumpBlock(Value *v, char *blockName) {    Function *F = getParentFunction(v); -  if (F == 0) +  if (!F)      return;    for (Function::iterator it = F->begin(), ie = F->end(); it != ie; ++it) { @@ -440,8 +479,8 @@ void llvm::dumpBlock(Value *v, char *blockName) {  // Find an instruction by name  Instruction *llvm::getInst(Value *base, char *instName) {    Function *F = getParentFunction(base); -  if (F == 0) -    return 0; +  if (!F) +    return nullptr;    for (inst_iterator it = inst_begin(F), ie = inst_end(F); it != ie; ++it) {      Instruction *I = &*it; @@ -450,7 +489,7 @@ Instruction *llvm::getInst(Value *base, char *instName) {      }    } -  return 0; +  return nullptr;  }  // Dump an instruction by nane diff --git a/lib/Target/NVPTX/NVPTXUtilities.h b/lib/Target/NVPTX/NVPTXUtilities.h index a208004..446bfa1 100644 --- a/lib/Target/NVPTX/NVPTXUtilities.h +++ b/lib/Target/NVPTX/NVPTXUtilities.h @@ -28,6 +28,8 @@ namespace llvm {  #define NVCL_IMAGE2D_READONLY_FUNCNAME "__is_image2D_readonly"  #define NVCL_IMAGE3D_READONLY_FUNCNAME "__is_image3D_readonly" +void clearAnnotationCache(const llvm::Module *); +  bool findOneNVVMAnnotation(const llvm::GlobalValue *, std::string, unsigned &);  bool findAllNVVMAnnotation(const llvm::GlobalValue *, std::string,                             std::vector<unsigned> &); @@ -38,6 +40,8 @@ bool isSampler(const llvm::Value &);  bool isImage(const llvm::Value &);  bool isImageReadOnly(const llvm::Value &);  bool isImageWriteOnly(const llvm::Value &); +bool isImageReadWrite(const llvm::Value &); +bool isManaged(const llvm::Value &);  std::string getTextureName(const llvm::Value &);  std::string getSurfaceName(const llvm::Value &); diff --git a/lib/Target/NVPTX/NVVMReflect.cpp b/lib/Target/NVPTX/NVVMReflect.cpp index 8b5444a..cb8bd72 100644 --- a/lib/Target/NVPTX/NVVMReflect.cpp +++ b/lib/Target/NVPTX/NVVMReflect.cpp @@ -38,6 +38,8 @@  using namespace llvm; +#define DEBUG_TYPE "nvptx-reflect" +  namespace llvm { void initializeNVVMReflectPass(PassRegistry &); }  namespace { @@ -49,13 +51,13 @@ private:  public:    static char ID; -  NVVMReflect() : ModulePass(ID), ReflectFunction(0) { +  NVVMReflect() : ModulePass(ID), ReflectFunction(nullptr) {      initializeNVVMReflectPass(*PassRegistry::getPassRegistry());      VarMap.clear();    }    NVVMReflect(const StringMap<int> &Mapping) -  : ModulePass(ID), ReflectFunction(0) { +  : ModulePass(ID), ReflectFunction(nullptr) {      initializeNVVMReflectPass(*PassRegistry::getPassRegistry());      for (StringMap<int>::const_iterator I = Mapping.begin(), E = Mapping.end();           I != E; ++I) { @@ -63,8 +65,10 @@ public:      }    } -  void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); } -  virtual bool runOnModule(Module &); +  void getAnalysisUsage(AnalysisUsage &AU) const override { +    AU.setPreservesAll(); +  } +  bool runOnModule(Module &) override;    void setVarMap();  }; @@ -126,7 +130,7 @@ bool NVVMReflect::runOnModule(Module &M) {    // If reflect function is not used, then there will be    // no entry in the module. -  if (ReflectFunction == 0) +  if (!ReflectFunction)      return false;    // Validate _reflect function | 
